[TF2XLA] [NFC] Break apart the [TF2XLA/MLIR] -> xla_compiler dependency edge

This is needed for invoking the MLIR tf2xla bridge from xla_compiler. This CL breaks apart items from xla_compiler into individual build targets, which are then depended on from the MLIR TF bridge. PiperOrigin-RevId: 323640340 Change-Id: I78b972503db9e7b5254014ca7e889005490d8339
2020-07-28 13:23:04 -07:00 · 2020-07-28 13:23:04 -07:00 · bcfb60d0a1
commit bcfb60d0a1
parent 5198b44674
30 changed files with 668 additions and 388 deletions
--- a/tensorflow/compiler/aot/BUILD
+++ b/tensorflow/compiler/aot/BUILD
@ -308,6 +308,8 @@ cc_library(
    ],
    deps = [
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_context",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/core:framework",
    ],
    alwayslink = 1,
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@ -95,6 +95,7 @@ cc_library(
        ":xla_kernel_creator",  # buildcleaner: keep
        "//tensorflow/compiler/jit/kernels:xla_ops",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/tf2xla/kernels:xla_ops",
        "//tensorflow/compiler/xla/service:cpu_plugin",  # buildcleaner: keep
        "//tensorflow/core:core_cpu_internal",
@ -115,6 +116,7 @@ cc_library(
        ":xla_kernel_creator",  # buildcleaner: keep
        "//tensorflow/compiler/jit/kernels:xla_ops",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/tf2xla/kernels:xla_ops",
        "//tensorflow/compiler/xla/service:gpu_plugin",  # buildcleaner: keep
        "//tensorflow/core:core_cpu_internal",
@ -172,6 +174,7 @@ XLA_DEVICE_DEPS = [
    "//tensorflow/compiler/tf2xla:common",
    "//tensorflow/compiler/tf2xla:tf2xla_util",
    "//tensorflow/compiler/tf2xla:xla_compiler",
    "//tensorflow/compiler/tf2xla:xla_op_registry",
    "//tensorflow/compiler/tf2xla/kernels:xla_ops",
    "//tensorflow/compiler/xla:util",
    "//tensorflow/compiler/xla/client:client_library",
@ -343,6 +346,7 @@ cc_library(
        "//tensorflow/compiler/mlir/tensorflow:compile_mlir_util_no_tf_dialect_passes",
        "//tensorflow/compiler/tf2xla:common",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_context",
        "//tensorflow/compiler/xla:statusor",
        "//tensorflow/compiler/xla:util",
        "//tensorflow/compiler/xla/client:client_library",
@ -406,6 +410,7 @@ cc_library(
        ":compilation_passes",
        "//tensorflow/compiler/jit/kernels:xla_ops_no_jit_rewrite_registration",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/core:core_cpu_internal",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
@ -641,6 +646,7 @@ cc_library(
        "//tensorflow/compiler/tf2xla:side_effect_util",
        "//tensorflow/compiler/tf2xla:tf2xla_util",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/tf2xla/cc:xla_jit_ops",
        "//tensorflow/compiler/tf2xla/cc:xla_ops",
        "//tensorflow/compiler/xla:status_macros",
@ -700,6 +706,7 @@ cc_library(
    hdrs = ["device_util.h"],
    deps = [
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/xla:status_macros",
        "//tensorflow/compiler/xla:statusor",
        "//tensorflow/core:framework",
@ -914,6 +921,7 @@ cc_library(
        "//tensorflow/compiler/jit/graphcycles",
        "//tensorflow/compiler/tf2xla:resource_operation_table",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/xla:statusor",
        "//tensorflow/compiler/xla:util",
        "//tensorflow/core:core_cpu",
--- a/tensorflow/compiler/jit/kernels/BUILD
+++ b/tensorflow/compiler/jit/kernels/BUILD
@ -21,6 +21,7 @@ XLA_OPS_DEPS = [
    "//tensorflow/compiler/tf2xla:common",
    "//tensorflow/compiler/tf2xla:tf2xla_util",
    "//tensorflow/compiler/tf2xla:xla_compiler",
    "//tensorflow/compiler/tf2xla:xla_op_registry",
    "//tensorflow/compiler/xla:executable_run_options",
    "//tensorflow/compiler/xla:status_macros",
    "//tensorflow/compiler/xla:statusor",
--- a/tensorflow/compiler/mlir/BUILD
+++ b/tensorflow/compiler/mlir/BUILD
@ -150,6 +150,7 @@ tf_cc_binary(
        "//tensorflow/compiler/mlir/tensorflow:translate_registration",
        "//tensorflow/compiler/mlir/tensorflow:translate_tf_dialect_op",
        "//tensorflow/compiler/mlir/xla:xla_mlir_translate",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:tensorflow",
--- a/tensorflow/compiler/mlir/tensorflow/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/BUILD
@ -1477,10 +1477,13 @@ COMPILE_MLIR_UTIL_DEPS = [
    "//tensorflow/compiler/mlir/xla:xla_legalize_tf",
    "//tensorflow/compiler/mlir/xla:xla_legalize_tf_with_tf2xla",
    "//tensorflow/compiler/tf2xla:common",
-    "//tensorflow/compiler/tf2xla:xla_compiler",
+    "//tensorflow/compiler/tf2xla:xla_helpers",
    "//tensorflow/compiler/tf2xla:xla_argument",
    "//tensorflow/compiler/xla/client:xla_computation",
    "//tensorflow/core/common_runtime:core_cpu_internal",
    "//tensorflow/core/platform:logging",
    "//tensorflow/core:framework",
    "//tensorflow/core:protos_all_cc",
    "//tensorflow/core/platform:logging",
    "//tensorflow/stream_executor/lib",
    "//tensorflow/compiler/xla:xla_data_proto_cc",
    "//tensorflow/compiler/xla/service:hlo",
--- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
@ -83,7 +83,7 @@ Status ParseMlirModule(llvm::StringRef mlir_module_string,
 Status GetXlaInputShapes(
    mlir::ModuleOp module, llvm::ArrayRef<TensorShape> arg_shapes,
    bool use_tuple_args,
-    const XlaCompiler::ShapeRepresentationFn shape_representation_fn,
+    const XlaHelpers::ShapeRepresentationFn shape_representation_fn,
    std::vector<xla::Shape>* xla_input_shapes) {
  xla_input_shapes->clear();
@ -135,9 +135,8 @@ Status GetXlaInputShapes(
 // output based on static shapes in MLIR module
 Status GetOutputInfo(
    mlir::ModuleOp module,
-    const XlaCompiler::ShapeRepresentationFn shape_representation_fn,
+    const XlaHelpers::ShapeRepresentationFn shape_representation_fn,
-    xla::Shape* xla_output_shape,
+    xla::Shape* xla_output_shape, std::vector<XlaOutputDescription>* outputs) {
    std::vector<XlaCompiler::OutputDescription>* outputs) {
  auto shape_representation_fn_no_fast_memory =
      [shape_representation_fn](const TensorShape& shape, DataType dtype) {
        return shape_representation_fn(shape, dtype, /*use_fast_memory=*/false);
@ -161,7 +160,7 @@ Status GetOutputInfo(
    // Construct OutputDescription for result.
    outputs->emplace_back();
-    XlaCompiler::OutputDescription& out_desc = outputs->back();
+    XlaOutputDescription& out_desc = outputs->back();
    TF_RETURN_IF_ERROR(ConvertToDataType(tensor_type, &out_desc.type));
    // TODO(ycao): Support constant output.
    out_desc.is_constant = false;
@ -185,7 +184,7 @@ Status GetOutputInfo(
 // TODO(ycao): Implement logic to compute resource updates when we need to
 // support graphs with resource updates in MLIR-based TF compiler bridge.
 void GetResourceUpdatesForMlir(
-    std::vector<XlaCompiler::ResourceUpdate>* resource_updates) {
+    std::vector<XlaResourceUpdate>* resource_updates) {
  resource_updates->clear();
 }
@ -265,7 +264,7 @@ Status ConvertMLIRToXlaComputation(
    mlir::ModuleOp module_op, llvm::StringRef device_type,
    xla::XlaComputation* xla_computation, bool use_tuple_args,
    bool return_tuple,
-    const XlaCompiler::ShapeRepresentationFn shape_representation_fn,
+    const XlaHelpers::ShapeRepresentationFn shape_representation_fn,
    std::vector<std::unique_ptr<mlir::Pass>> custom_legalization_passes) {
  mlir::PassManager tf2xla(module_op.getContext());
  tf2xla.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
@ -341,8 +340,8 @@ Status ConvertMLIRToXlaComputation(
 static Status CompileMlirToXlaHlo(
    mlir::ModuleOp module_op, llvm::ArrayRef<TensorShape> arg_shapes,
    llvm::StringRef device_type, bool use_tuple_args,
-    XlaCompiler::ShapeRepresentationFn shape_representation_fn,
+    XlaHelpers::ShapeRepresentationFn shape_representation_fn,
-    XlaCompiler::CompilationResult* compilation_result,
+    XlaCompilationResult* compilation_result,
    std::vector<std::unique_ptr<mlir::Pass>> custom_legalization_passes) {
  if (VLOG_IS_ON(1))
    tensorflow::DumpMlirOpToFile("mlir_compile_before", module_op);
@ -391,8 +390,8 @@ static Status CompileMlirToXlaHlo(
 Status CompileSerializedMlirToXlaHlo(
    llvm::StringRef mlir_module_string, llvm::ArrayRef<TensorShape> arg_shapes,
    llvm::StringRef device_type, bool use_tuple_args,
-    const XlaCompiler::ShapeRepresentationFn shape_representation_fn,
+    const XlaHelpers::ShapeRepresentationFn shape_representation_fn,
-    XlaCompiler::CompilationResult* compilation_result,
+    XlaCompilationResult* compilation_result,
    std::vector<std::unique_ptr<mlir::Pass>> custom_legalization_passes) {
  RegisterDialects();
  mlir::MLIRContext mlir_context;
@ -411,16 +410,16 @@ Status CompileSerializedMlirToXlaHlo(
 // removed from the signature.
 // Returns the original indices for the other arguments on success.
 static StatusOr<std::vector<int>> RewriteWithArgs(
-    mlir::ModuleOp module, llvm::ArrayRef<const XlaCompiler::Argument> args) {
+    mlir::ModuleOp module, llvm::ArrayRef<const XlaArgument> args) {
  mlir::FuncOp main_fn = module.lookupSymbol<mlir::FuncOp>("main");
  std::vector<int> params;
  auto builder = mlir::OpBuilder(main_fn.getBody());
  std::vector<int> args_to_erase;
  for (int idx = 0; idx < args.size(); idx++) {
-    const XlaCompiler::Argument& xla_arg = args[idx];
+    const XlaArgument& xla_arg = args[idx];
    mlir::BlockArgument mlir_arg = main_fn.getArgument(idx);
-    if (xla_arg.kind != XlaCompiler::Argument::kConstant) {
+    if (xla_arg.kind != XlaArgument::kConstant) {
      params.push_back(idx);
      continue;
    }
@ -439,11 +438,11 @@ static StatusOr<std::vector<int>> RewriteWithArgs(
 }
 Status CompileGraphToXlaHlo(
-    const Graph& graph, llvm::ArrayRef<const XlaCompiler::Argument> args,
+    const Graph& graph, llvm::ArrayRef<const XlaArgument> args,
    llvm::StringRef device_type, bool use_tuple_args,
    const FunctionLibraryDefinition& flib_def, const GraphDebugInfo& debug_info,
-    const XlaCompiler::ShapeRepresentationFn shape_representation_fn,
+    const XlaHelpers::ShapeRepresentationFn shape_representation_fn,
-    XlaCompiler::CompilationResult* compilation_result,
+    XlaCompilationResult* compilation_result,
    std::vector<std::unique_ptr<mlir::Pass>> custom_legalization_passes) {
  RegisterDialects();
--- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h
@ -20,7 +20,10 @@ limitations under the License.
 #include "llvm/ADT/StringRef.h"
 #include "mlir/IR/Module.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
-#include "tensorflow/compiler/tf2xla/xla_compiler.h"
+#include "tensorflow/compiler/tf2xla/xla_argument.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/protobuf/graph_debug_info.pb.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
@ -57,7 +60,7 @@ Status ConvertMLIRToXlaComputation(
    mlir::ModuleOp module_op, llvm::StringRef device_type,
    xla::XlaComputation* xla_computation, bool use_tuple_args,
    bool return_tuple,
-    const XlaCompiler::ShapeRepresentationFn shape_representation_fn = nullptr,
+    const XlaHelpers::ShapeRepresentationFn shape_representation_fn = nullptr,
    std::vector<std::unique_ptr<mlir::Pass>> custom_legalization_passes = {});
 // Compiles a serialized MLIR module into XLA HLO, generates all accompanying
@ -65,17 +68,17 @@ Status ConvertMLIRToXlaComputation(
 Status CompileSerializedMlirToXlaHlo(
    llvm::StringRef mlir_module_string, llvm::ArrayRef<TensorShape> arg_shapes,
    llvm::StringRef device_type, bool use_tuple_args,
-    const XlaCompiler::ShapeRepresentationFn shape_representation_fn,
+    const XlaHelpers::ShapeRepresentationFn shape_representation_fn,
-    XlaCompiler::CompilationResult* compilation_result,
+    XlaCompilationResult* compilation_result,
    std::vector<std::unique_ptr<mlir::Pass>> custom_legalization_passes = {});
 // Same as the above but takes input as TensorFlow Graph.
 Status CompileGraphToXlaHlo(
-    const Graph& graph, llvm::ArrayRef<const XlaCompiler::Argument> args,
+    const Graph& graph, llvm::ArrayRef<const XlaArgument> args,
    llvm::StringRef device_type, bool use_tuple_args,
    const FunctionLibraryDefinition& flib_def, const GraphDebugInfo& debug_info,
-    const XlaCompiler::ShapeRepresentationFn shape_representation_fn,
+    const XlaHelpers::ShapeRepresentationFn shape_representation_fn,
-    XlaCompiler::CompilationResult* compilation_result,
+    XlaCompilationResult* compilation_result,
    std::vector<std::unique_ptr<mlir::Pass>> custom_legalization_passes = {});
 }  // namespace tensorflow
--- a/tensorflow/compiler/mlir/xla/BUILD
+++ b/tensorflow/compiler/mlir/xla/BUILD
@ -92,7 +92,11 @@ cc_library(
        "//tensorflow/compiler/mlir/tensorflow:export_tf_dialect_op",
        "//tensorflow/compiler/mlir/tensorflow:lower_tf_lib",
        "//tensorflow/compiler/mlir/tensorflow:translate_utils",
-        "//tensorflow/compiler/tf2xla:xla_compiler",
+        "//tensorflow/compiler/tf2xla:xla_compilation_device",
        "//tensorflow/compiler/tf2xla:xla_context",
        "//tensorflow/compiler/tf2xla:xla_expression",
        "//tensorflow/compiler/tf2xla:xla_helpers",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/xla/client:xla_builder",
        "//tensorflow/core:core_cpu_lib",
        "//tensorflow/core:framework",
@ -125,8 +129,10 @@ cc_library(
        "//tensorflow/compiler/mlir/hlo",
        "//tensorflow/compiler/mlir/hlo:hlo_dialect_registration",
        "//tensorflow/compiler/mlir/hlo:lhlo",
        "//tensorflow/compiler/xla:debug_options_flags",
        "//tensorflow/compiler/xla:statusor",
        "//tensorflow/compiler/xla:util",
        "//tensorflow/compiler/xla/service:backend",
        "//tensorflow/compiler/xla/service:buffer_assignment",
        "//tensorflow/compiler/xla/service:hlo",
        "//tensorflow/compiler/xla/service:hlo_casting_utils",
@ -228,7 +234,7 @@ cc_library(
        "//tensorflow/compiler/mlir/tensorflow:convert_type",
        "//tensorflow/compiler/mlir/tensorflow:error_util",
        "//tensorflow/compiler/tf2xla:common",
-        "//tensorflow/compiler/tf2xla:xla_compiler",
+        "//tensorflow/compiler/tf2xla:xla_helpers",
        "//tensorflow/compiler/xla:comparison_util",
        "//tensorflow/compiler/xla:literal_util",
        "//tensorflow/compiler/xla:shape_util",
--- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
+++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
@ -43,7 +43,6 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"
 #include "tensorflow/compiler/mlir/xla/type_to_shape.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
 #include "tensorflow/compiler/xla/client/lib/matrix.h"
 #include "tensorflow/compiler/xla/client/lib/quantize.h"
 #include "tensorflow/compiler/xla/client/lib/slicing.h"
@ -463,7 +462,7 @@ class ConvertToHloModule {
  // single value.
  explicit ConvertToHloModule(
      mlir::ModuleOp module, bool use_tuple_args, bool return_tuple,
-      tensorflow::XlaCompiler::ShapeRepresentationFn shape_representation_fn)
+      tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn)
      : module_(module),
        module_builder_("main"),
        use_tuple_args_(use_tuple_args),
@ -545,7 +544,7 @@ class ConvertToHloModule {
  // Shape representation function to determine entry function argument and
  // result shapes.
-  tensorflow::XlaCompiler::ShapeRepresentationFn shape_representation_fn_;
+  tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn_;
  // Unique suffix to give to the name of the next lowered region.
  size_t region_id_ = 0;
@ -1500,7 +1499,7 @@ LogicalResult AddDynamicParameterBindings(mlir::ModuleOp module,
 Status ConvertMlirHloToHlo(mlir::ModuleOp module, xla::HloProto* hlo_proto,
                           bool use_tuple_args, bool return_tuple,
-                           const tensorflow::XlaCompiler::ShapeRepresentationFn
+                           const tensorflow::XlaHelpers::ShapeRepresentationFn
                               shape_representation_fn) {
  mlir::StatusScopedDiagnosticHandler diag_handler(module.getContext());
  ConvertToHloModule converter(module, use_tuple_args, return_tuple,
--- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h
+++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h
@ -18,9 +18,10 @@ limitations under the License.
 #include "mlir/IR/Module.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
-#include "tensorflow/compiler/tf2xla/xla_compiler.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 namespace mlir {
@ -33,7 +34,7 @@ namespace mlir {
 // single value.
 Status ConvertMlirHloToHlo(mlir::ModuleOp module, ::xla::HloProto* hlo_proto,
                           bool use_tuple_args, bool return_tuple,
-                           const tensorflow::XlaCompiler::ShapeRepresentationFn
+                           const tensorflow::XlaHelpers::ShapeRepresentationFn
                               shape_representation_fn = nullptr);
 // Creates XlaOp equivalent of a given MLIR operation using the operand info
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc
@ -48,7 +48,8 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/xla_compilation_device.h"
 #include "tensorflow/compiler/tf2xla/xla_context.h"
 #include "tensorflow/compiler/tf2xla/xla_expression.h"
-#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
@ -410,7 +411,7 @@ LogicalResult Tf2XlaRewriter::LegalizeOp() {
        device_->GetAllocator(tensorflow::AllocatorAttributes()), expr.dtype(),
        shape_or.ValueOrDie());
    tensorflow::Tensor& tensor = tensors.back();
-    tensorflow::XlaOpKernelContext::AssignExpressionToTensor(expr, &tensor);
+    tensorflow::XlaExpression::AssignExpressionToTensor(expr, &tensor);
    inputs.emplace_back(&tensor);
  }
@ -438,7 +439,7 @@ LogicalResult Tf2XlaRewriter::LegalizeOp() {
  for (int i = 0, e = op_->getNumResults(); i < e; i++) {
    tensorflow::Tensor* output = op_context.mutable_output(i);
    const tensorflow::XlaExpression* expr =
-        tensorflow::XlaOpKernelContext::CastExpressionFromTensor(*output);
+        tensorflow::XlaExpression::CastExpressionFromTensor(*output);
    if (expr->kind() != tensorflow::XlaExpression::Kind::kXlaOp)
      return op_->emitError(
          "expects XlaExpression of kind kXlaOp in compiled output");
--- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc
@ -37,6 +37,8 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/xla/hlo_function_importer.h"
 #include "tensorflow/compiler/mlir/xla/hlo_utils.h"
 #include "tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h"
 #include "tensorflow/compiler/xla/debug_options_flags.h"
 #include "tensorflow/compiler/xla/service/backend.h"
 #include "tensorflow/compiler/xla/service/buffer_assignment.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@ -50,6 +50,7 @@ cc_library(
    visibility = ["//visibility:public"],
    deps = [
        ":xla_compiler",
        ":xla_op_registry",
        "//tensorflow/compiler/tf2xla/kernels:xla_ops",
        "//tensorflow/core:framework",
        "//tensorflow/core:framework_internal",
@ -145,6 +146,7 @@ cc_library(
        ":tf2xla_proto_cc",
        ":tf2xla_util",
        ":xla_compiler",
        ":xla_op_registry",
        "//tensorflow/compiler/aot:aot_only_var_handle_op",
        "//tensorflow/compiler/tf2xla/kernels:xla_ops",
        "//tensorflow/compiler/xla/client",
@ -316,14 +318,8 @@ cc_library(
    srcs = [
        "const_analysis.cc",
        "graph_compiler.cc",
        "xla_compilation_device.cc",
        "xla_compiler.cc",
        "xla_context.cc",
        "xla_expression.cc",
        "xla_helpers.cc",
        "xla_op_kernel.cc",
        "xla_op_registry.cc",
        "xla_resource.cc",
        "xla_cpu_backend.cc",
    ] + if_cuda_is_configured([
        "xla_gpu_backend.cc",
@ -333,14 +329,10 @@ cc_library(
    hdrs = [
        "const_analysis.h",
        "graph_compiler.h",
        "xla_compilation_device.h",
        "xla_compiler.h",
        "xla_context.h",
        "xla_expression.h",
        "xla_helpers.h",
        "xla_op_kernel.h",
        "xla_op_registry.h",
        "xla_resource.h",
    ],
    visibility = [":friends"],
    deps = [
@ -351,10 +343,18 @@ cc_library(
        ":sharding_util",
        ":side_effect_util",
        ":tf2xla_util",
        ":xla_argument",
        ":xla_compilation_device",
        ":xla_context",
        ":xla_expression",
        ":xla_helpers",
        ":xla_op_registry",
        ":xla_resource",
        "//tensorflow/compiler/jit:common",
        "//tensorflow/compiler/jit:flags",
        "//tensorflow/compiler/jit:shape_inference",
        "//tensorflow/compiler/jit:xla_cluster_util",
        "//tensorflow/compiler/mlir/tensorflow:compile_mlir_util_no_tf_dialect_passes",
        "//tensorflow/compiler/tf2xla/lib:util",
        "//tensorflow/compiler/xla:literal",
        "//tensorflow/compiler/xla:shape_util",
@ -370,6 +370,7 @@ cc_library(
        "//tensorflow/compiler/xla/client:xla_computation",
        "//tensorflow/compiler/xla/client/lib:arithmetic",
        "//tensorflow/compiler/xla/client/lib:constants",
        "//tensorflow/compiler/xla/service:hlo",
        "//tensorflow/core:core_cpu",
        "//tensorflow/core:core_cpu_internal",
        "//tensorflow/core:framework",
@ -388,6 +389,172 @@ cc_library(
    alwayslink = 1,
 )
 cc_library(
    name = "xla_compilation_device",
    srcs = [
        "xla_compilation_device.cc",
    ],
    hdrs = [
        "xla_compilation_device.h",
    ],
    deps = [
        ":common",
        ":frontend_attributes_util",
        ":sharding_util",
        ":xla_context",
        ":xla_helpers",
        "//tensorflow/compiler/xla/client:xla_builder",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
        "//tensorflow/core:session_options",
        "//tensorflow/core/common_runtime:core_cpu_internal",
    ],
    alwayslink = 1,
 )
 cc_library(
    name = "xla_context",
    srcs = [
        "xla_context.cc",
    ],
    hdrs = [
        "xla_context.h",
    ],
    deps = [
        ":common",
        ":xla_expression",
        ":xla_helpers",
        "//tensorflow/compiler/xla:literal",
        "//tensorflow/compiler/xla:shape_util",
        "//tensorflow/compiler/xla:status_macros",
        "//tensorflow/compiler/xla:statusor",
        "//tensorflow/compiler/xla:xla_data_proto_cc",
        "//tensorflow/compiler/xla/client:client_library",
        "//tensorflow/compiler/xla/client:xla_builder",
        "//tensorflow/compiler/xla/client:xla_computation",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
        "//tensorflow/core/common_runtime:core_cpu_internal",
        "@com_google_absl//absl/types:span",
    ],
    alwayslink = 1,
 )
 cc_library(
    name = "xla_op_registry",
    srcs = [
        "xla_op_registry.cc",
    ],
    hdrs = [
        "xla_op_registry.h",
    ],
    visibility = [":friends"],
    deps = [
        ":common",
        ":xla_context",
        "//tensorflow/compiler/jit:flags",
        "//tensorflow/compiler/jit:xla_cluster_util",
        "//tensorflow/compiler/xla/client:client_library",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:session_options",
        "//tensorflow/core:stream_executor_no_cuda",
        "//tensorflow/core/common_runtime:core_cpu_internal",
    ],
    alwayslink = 1,
 )
 cc_library(
    name = "xla_expression",
    srcs = [
        "xla_expression.cc",
    ],
    hdrs = [
        "xla_expression.h",
    ],
    deps = [
        ":common",
        ":xla_resource",
        "//tensorflow/compiler/xla:statusor",
        "//tensorflow/compiler/xla/client",
        "//tensorflow/compiler/xla/client:xla_builder",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "@com_google_absl//absl/types:optional",
    ],
    alwayslink = 1,
 )
 cc_library(
    name = "xla_resource",
    srcs = [
        "xla_resource.cc",
    ],
    hdrs = [
        "xla_resource.h",
    ],
    deps = [
        ":common",
        ":sharding_util",
        ":xla_helpers",
        "//tensorflow/compiler/xla:xla_data_proto_cc",
        "//tensorflow/compiler/xla/client:xla_builder",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "@com_google_absl//absl/memory",
        "@com_google_absl//absl/strings",
    ],
    alwayslink = 1,
 )
 cc_library(
    name = "xla_helpers",
    srcs = [
        "xla_helpers.cc",
    ],
    hdrs = [
        "xla_helpers.h",
    ],
    visibility = [":friends"],
    deps = [
        ":common",
        ":host_compute_metadata_proto_cc",
        "//tensorflow/compiler/tf2xla/lib:util",
        "//tensorflow/compiler/xla:types",
        "//tensorflow/compiler/xla/client:xla_builder",
        "//tensorflow/compiler/xla/client:xla_computation",
        "//tensorflow/compiler/xla/client/lib:arithmetic",
        "//tensorflow/compiler/xla/client/lib:constants",
        "//tensorflow/compiler/xla/service:hlo",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
        "@com_google_absl//absl/types:span",
    ],
    alwayslink = 1,
 )
 cc_library(
    name = "xla_argument",
    srcs = [
        "xla_argument.cc",
    ],
    hdrs = [
        "xla_argument.h",
    ],
    deps = [
        ":host_compute_metadata_proto_cc",
        ":xla_resource",
        "//tensorflow/compiler/xla/client:xla_builder",
        "//tensorflow/compiler/xla/service:hlo",
        "//tensorflow/core:framework",
        "@com_google_absl//absl/types:span",
    ],
    alwayslink = 1,
 )
 cc_library(
    name = "common",
    srcs = [
@ -564,6 +731,8 @@ tf_cc_test(
        ":common",
        ":side_effect_util",
        ":xla_compiler",
        ":xla_expression",
        ":xla_resource",
        "//tensorflow/cc:cc_ops",
        "//tensorflow/cc:function_ops",
        "//tensorflow/cc:functional_ops",
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@ -145,7 +145,12 @@ tf_kernel_library(
        "//tensorflow/compiler/jit:xla_activity_listener",
        "//tensorflow/compiler/jit:xla_activity_proto_cc",
        "//tensorflow/compiler/tf2xla:common",
        "//tensorflow/compiler/tf2xla:xla_compilation_device",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_context",
        "//tensorflow/compiler/tf2xla:xla_helpers",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/tf2xla:xla_resource",
        "//tensorflow/compiler/tf2xla/lib:broadcast",
        "//tensorflow/compiler/tf2xla/lib:data_format",
        "//tensorflow/compiler/tf2xla/lib:random",
@ -223,6 +228,8 @@ cc_library(
    deps = [
        "//tensorflow/compiler/tf2xla:common",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_helpers",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/xla:literal_util",
        "//tensorflow/compiler/xla:statusor",
        "//tensorflow/compiler/xla:util",
@ -276,6 +283,8 @@ tf_kernel_library(
        "//tensorflow/compiler/tf2xla:common",
        "//tensorflow/compiler/tf2xla:side_effect_util",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_helpers",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/tf2xla/ops:xla_ops",
        "//tensorflow/compiler/xla:literal",
        "//tensorflow/compiler/xla:status_macros",
@ -296,6 +305,8 @@ tf_kernel_library(
        "//tensorflow/compiler/tf2xla:common",
        "//tensorflow/compiler/tf2xla:side_effect_util",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_context",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/tf2xla/ops:xla_ops",
        "//tensorflow/compiler/xla:literal",
        "//tensorflow/compiler/xla/client:xla_builder",
@ -314,6 +325,8 @@ tf_kernel_library(
        "//tensorflow/compiler/tf2xla:common",
        "//tensorflow/compiler/tf2xla:side_effect_util",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_context",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/tf2xla/ops:xla_ops",
        "//tensorflow/compiler/xla:literal",
        "//tensorflow/compiler/xla/client:xla_builder",
@ -333,6 +346,7 @@ tf_kernel_library(
    ],
    deps = [
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/core:array_ops_op_lib",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
--- a/tensorflow/compiler/tf2xla/lib/BUILD
+++ b/tensorflow/compiler/tf2xla/lib/BUILD
@ -38,6 +38,7 @@ cc_library(
    hdrs = ["random.h"],
    deps = [
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_helpers",
        "//tensorflow/compiler/xla:status_macros",
        "//tensorflow/compiler/xla:statusor",
        "//tensorflow/compiler/xla/client:xla_builder",
--- a/tensorflow/compiler/tf2xla/xla_argument.cc
+++ b/tensorflow/compiler/tf2xla/xla_argument.cc
@ -0,0 +1,53 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/compiler/tf2xla/xla_argument.h"
 namespace tensorflow {
 bool XlaArgument::operator==(const XlaArgument& other) const {
  if (std::tie(kind, resource_kind, type, name, initialized, max_array_size,
               tensor_array_gradients) !=
      std::tie(other.kind, other.resource_kind, other.type, other.name,
               other.initialized, other.max_array_size,
               other.tensor_array_gradients)) {
    return false;
  }
  if (absl::holds_alternative<xla::Shape>(shape)) {
    if (!absl::holds_alternative<xla::Shape>(other.shape)) {
      return false;
    }
    if (!xla::Shape::Equal()(absl::get<xla::Shape>(shape),
                             absl::get<xla::Shape>(other.shape))) {
      return false;
    }
  } else {
    if (!absl::holds_alternative<TensorShape>(other.shape)) {
      return false;
    }
    if (absl::get<TensorShape>(shape) != absl::get<TensorShape>(other.shape)) {
      return false;
    }
  }
  if (constant_value.shape() != other.constant_value.shape()) {
    return false;
  }
  if (is_same_data_across_replicas != other.is_same_data_across_replicas) {
    return false;
  }
  return constant_value.tensor_data() == other.constant_value.tensor_data();
 }
 }  // end namespace tensorflow
--- a/tensorflow/compiler/tf2xla/xla_argument.h
+++ b/tensorflow/compiler/tf2xla/xla_argument.h
@ -0,0 +1,121 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef TENSORFLOW_COMPILER_TF2XLA_XLA_ARGUMENT_H_
 #define TENSORFLOW_COMPILER_TF2XLA_XLA_ARGUMENT_H_
 #include "absl/types/span.h"
 #include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h"
 #include "tensorflow/compiler/tf2xla/xla_resource.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/service/hlo_sharding.h"
 #include "tensorflow/core/framework/tensor.h"
 namespace tensorflow {
 // Describes how to derive the value of each _Arg node in the graph/function
 // being compiled. There must be one Argument for each _Arg index.
 struct XlaArgument {
  enum Kind {
    // Default value; not a valid kind.
    kInvalid,
    // Argument is a compile-time constant. No associated runtime parameter.
    kConstant,
    // Argument is a Variable, TensorArray, or Stack resource. Has an
    // associated runtime parameter iff `initialized` is true.
    kResource,
    // Argument is a run-time parameter.
    kParameter,
    // Argument is an XLA token.
    kToken,
    // Argument is a TensorList.
    kTensorList,
  };
  Kind kind = kInvalid;
  // The type of the argument. If the argument is a resource, this
  // is the type of the variable's value, not DT_RESOURCE.
  DataType type = DT_INVALID;
  // The shape of the argument. For:
  // * a parameter: the shape of the parameter. We allow setting the xla shape
  //   if known. This helps avoid conversions to and from TensorShape.
  // * a constant: ignored; the shape given by constant_value is used
  //     instead.
  // * an uninitialized resource: ignored. We don't yet know the shape of an
  //     uninitialized resource (otherwise we would have initialized it!)
  // * an initialized variable: the shape of the variable's value.
  // * an initialized TensorArray or Stack resource: the shape of an entry in
  //   the TensorArray/Stack. Note this is the size of a single entry, not the
  //   XLA data structure that represents the complete stack/array.
  absl::variant<TensorShape, xla::Shape> shape;
  // The value of the argument, if it is a compile-time constant. Must be a
  // host-memory tensor.
  Tensor constant_value;
  // The name of this argument, used for debugging.
  string name;
  // The name of TensorFlow _Arg node, used for debugging.
  string node_name;
  // For a kResource, what kind of resource is it?
  XlaResource::Kind resource_kind = XlaResource::kInvalid;
  // For a kResource, has this resource been initialized?
  bool initialized = false;
  // For a kResource, is this resource on Fast Memory.
  bool fast_mem = false;
  // For a TensorArray or Stack resource, what is the array's declared size?
  // (Used for lazy initialization.)
  int64 max_array_size = -1;
  // TensorArray resource parameters are passed as (array, gradient array 0,
  // ..., gradient array k), where the gradient arrays are in the same order
  // as `tensor_array_gradients`.
  std::set<string> tensor_array_gradients;
  // dynamic dims to arg number map. Empty if no dynamic shapes.
  std::map<int32, int32> dynamic_dim_to_arg_num_map;
  bool is_pad_arg = false;
  // Whether this argument will receive the same data across all replicas.
  bool is_same_data_across_replicas = false;
  bool operator==(const XlaArgument& other) const;
  // Returns a human-readable summary of the argument.
  string HumanString() const;
  // Returns the dimension sizes for either TensorShape or xla::Shape.
  std::vector<int64> DimensionSizes() const;
  absl::InlinedVector<int64, 4> DimensionSizesAsInlinedVector() const;
  // Returns the human-readable string for either TensorShape or xla::Shape.
  string ShapeHumanString() const;
 };
 }  // end namespace tensorflow
 #endif  // TENSORFLOW_COMPILER_TF2XLA_XLA_ARGUMENT_H_
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc
@ -422,39 +422,6 @@ Status BuildComputation(
 }  // namespace
 bool XlaCompiler::Argument::operator==(
    const XlaCompiler::Argument& other) const {
  if (std::tie(kind, resource_kind, type, name, initialized, max_array_size,
               tensor_array_gradients) !=
      std::tie(other.kind, other.resource_kind, other.type, other.name,
               other.initialized, other.max_array_size,
               other.tensor_array_gradients)) {
    return false;
  }
  if (absl::holds_alternative<xla::Shape>(shape)) {
    if (!absl::holds_alternative<xla::Shape>(other.shape)) {
      return false;
    }
    if (!xla::Shape::Equal()(absl::get<xla::Shape>(shape),
                             absl::get<xla::Shape>(other.shape))) {
      return false;
    }
  } else {
    if (!absl::holds_alternative<TensorShape>(other.shape)) {
      return false;
    }
    if (absl::get<TensorShape>(shape) != absl::get<TensorShape>(other.shape)) {
      return false;
    }
  }
  if (constant_value.shape() != other.constant_value.shape()) {
    return false;
  }
  if (is_same_data_across_replicas != other.is_same_data_across_replicas) {
    return false;
  }
  return constant_value.tensor_data() == other.constant_value.tensor_data();
 }
 string XlaCompiler::Argument::HumanString() const {
  string common;
@ -1494,93 +1461,4 @@ xla::StatusOr<xla::XlaOp> XlaCompiler::GetNodeToken(const string& node_name) {
  return iter->second;
 }
 XlaCompiler::ShapeRepresentationFn IdentityShapeRepresentationFn() {
  return [](const TensorShape& shape, DataType dtype,
            bool use_fast_memory) -> xla::StatusOr<xla::Shape> {
    xla::Shape xla_shape;
    TF_RETURN_IF_ERROR(TensorShapeToXLAShape(dtype, shape, &xla_shape));
    return xla_shape;
  };
 }
 // Rewrites the layout of xla_shape if there is tiled sharding.
 Status RewriteLayoutWithShardedShape(
    const absl::optional<xla::HloSharding>& sharding, bool use_fast_memory,
    XlaCompiler::ShapeRepresentationFn shape_representation_fn,
    xla::Shape* xla_shape) {
  if (sharding && !sharding->IsTileMaximal()) {
    // After sharding, per core shape might have different layout. For example,
    // before sharding, a shape [128, 128] will be assigned default
    // minor-to-major {1, 0}. But after we shard this shape to [128, 64] * 2,
    // the sharded shapes will have minor-to-major {0, 1}.
    //
    // As a result, for sharded shapes, we set their layout to per core shape's
    // layout.
    //
    // TODO(endlessroad): for variable input & update, we might have
    // different layouts which will prevent input output aliasing and
    // increase memory usage. Investigate such cases.
    int64 device = *sharding->tile_assignment().begin();
    std::vector<int64> offset =
        sharding->TileOffsetForDevice(*xla_shape, device);
    std::vector<int64> limit = sharding->TileLimitForDevice(*xla_shape, device);
    std::vector<int64> dimensions(xla_shape->rank());
    for (int64 i = 0; i < xla_shape->rank(); ++i) {
      dimensions[i] = limit[i] - offset[i];
    }
    xla::Shape per_device_xla_shape =
        xla::ShapeUtil::MakeShape(xla_shape->element_type(), dimensions);
    TensorShape per_device_tensor_shape;
    TF_RETURN_IF_ERROR(
        XLAShapeToTensorShape(per_device_xla_shape, &per_device_tensor_shape));
    TF_ASSIGN_OR_RETURN(DataType dtype, EncodePrimitiveTypeAsDataType(
                                            xla_shape->element_type()));
    TF_ASSIGN_OR_RETURN(per_device_xla_shape,
                        shape_representation_fn(per_device_tensor_shape, dtype,
                                                use_fast_memory));
    *xla_shape->mutable_layout() = per_device_xla_shape.layout();
  }
  return Status::OK();
 }
 // There is a shape_representation_fn or sharding for an output, this function
 // uses a reshape to fix the layout.
 xla::StatusOr<xla::XlaOp> ReshapeWithCorrectRepresentationAndSharding(
    xla::XlaBuilder* builder, xla::XlaOp original, xla::Shape original_shape,
    XlaCompiler::ShapeRepresentationFn shape_representation_fn,
    absl::optional<xla::OpSharding> sharding, bool fast_mem) {
  if (original_shape.IsTuple()) {
    std::vector<xla::XlaOp> elements;
    for (int64 i = 0; i < original_shape.tuple_shapes_size(); ++i) {
      auto subsharding = sharding ? sharding->tuple_shardings(i) : sharding;
      TF_ASSIGN_OR_RETURN(auto element,
                          ReshapeWithCorrectRepresentationAndSharding(
                              builder, xla::GetTupleElement(original, i),
                              original_shape.tuple_shapes(i),
                              shape_representation_fn, subsharding, fast_mem));
      elements.push_back(element);
    }
    return xla::Tuple(builder, elements);
  }
  if (!original_shape.IsArray()) return original;
  TensorShape shape;
  TF_RETURN_IF_ERROR(XLAShapeToTensorShape(original_shape, &shape));
  TF_ASSIGN_OR_RETURN(DataType dtype, EncodePrimitiveTypeAsDataType(
                                          original_shape.element_type()));
  TF_ASSIGN_OR_RETURN(auto to_shape,
                      shape_representation_fn(shape, dtype, fast_mem));
  if (sharding) {
    TF_ASSIGN_OR_RETURN(auto hlo_sharding,
                        xla::HloSharding::FromProto(*sharding));
    TF_RETURN_IF_ERROR(RewriteLayoutWithShardedShape(
        hlo_sharding, fast_mem, shape_representation_fn, &to_shape));
  }
  if (xla::ShapeUtil::Compatible(original_shape, to_shape)) {
    for (int64 i = 0; i < original_shape.rank(); ++i) {
      to_shape.set_dynamic_dimension(i, original_shape.is_dynamic_dimension(i));
    }
  }
  return xla::Reshape(to_shape, original);
 }
 }  // namespace tensorflow
--- a/tensorflow/compiler/tf2xla/xla_compiler.h
+++ b/tensorflow/compiler/tf2xla/xla_compiler.h
@ -21,8 +21,10 @@ limitations under the License.
 #include "absl/types/span.h"
 #include "absl/types/variant.h"
 #include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h"
 #include "tensorflow/compiler/tf2xla/xla_argument.h"
 #include "tensorflow/compiler/tf2xla/xla_compilation_device.h"
 #include "tensorflow/compiler/tf2xla/xla_expression.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
@ -97,96 +99,7 @@ class XlaContext;
 // `tensor_array_gradients` ordered set.
 class XlaCompiler {
 public:
-  // Describes how to derive the value of each _Arg node in the graph/function
+  using Argument = ::tensorflow::XlaArgument;
  // being compiled. There must be one Argument for each _Arg index.
  struct Argument {
    enum Kind {
      // Default value; not a valid kind.
      kInvalid,
      // Argument is a compile-time constant. No associated runtime parameter.
      kConstant,
      // Argument is a Variable, TensorArray, or Stack resource. Has an
      // associated runtime parameter iff `initialized` is true.
      kResource,
      // Argument is a run-time parameter.
      kParameter,
      // Argument is an XLA token.
      kToken,
      // Argument is a TensorList.
      kTensorList,
    };
    Kind kind = kInvalid;
    // The type of the argument. If the argument is a resource, this
    // is the type of the variable's value, not DT_RESOURCE.
    DataType type = DT_INVALID;
    // The shape of the argument. For:
    // * a parameter: the shape of the parameter. We allow setting the xla shape
    //   if known. This helps avoid conversions to and from TensorShape.
    // * a constant: ignored; the shape given by constant_value is used
    //     instead.
    // * an uninitialized resource: ignored. We don't yet know the shape of an
    //     uninitialized resource (otherwise we would have initialized it!)
    // * an initialized variable: the shape of the variable's value.
    // * an initialized TensorArray or Stack resource: the shape of an entry in
    //   the TensorArray/Stack. Note this is the size of a single entry, not the
    //   XLA data structure that represents the complete stack/array.
    absl::variant<TensorShape, xla::Shape> shape;
    // The value of the argument, if it is a compile-time constant. Must be a
    // host-memory tensor.
    Tensor constant_value;
    // The name of this argument, used for debugging.
    string name;
    // The name of TensorFlow _Arg node, used for debugging.
    string node_name;
    // For a kResource, what kind of resource is it?
    XlaResource::Kind resource_kind = XlaResource::kInvalid;
    // For a kResource, has this resource been initialized?
    bool initialized = false;
    // For a kResource, is this resource on Fast Memory.
    bool fast_mem = false;
    // For a TensorArray or Stack resource, what is the array's declared size?
    // (Used for lazy initialization.)
    int64 max_array_size = -1;
    // TensorArray resource parameters are passed as (array, gradient array 0,
    // ..., gradient array k), where the gradient arrays are in the same order
    // as `tensor_array_gradients`.
    std::set<string> tensor_array_gradients;
    // dynamic dims to arg number map. Empty if no dynamic shapes.
    std::map<int32, int32> dynamic_dim_to_arg_num_map;
    bool is_pad_arg = false;
    // Whether this argument will receive the same data across all replicas.
    bool is_same_data_across_replicas = false;
    bool operator==(const Argument& other) const;
    // Returns a human-readable summary of the argument.
    string HumanString() const;
    // Returns the dimension sizes for either TensorShape or xla::Shape.
    std::vector<int64> DimensionSizes() const;
    absl::InlinedVector<int64, 4> DimensionSizesAsInlinedVector() const;
    // Returns the human-readable string for either TensorShape or xla::Shape.
    string ShapeHumanString() const;
  };
  // Options pertaining to an individual call to CompileGraph() or
  // CompileFunction().
@ -221,77 +134,11 @@ class XlaCompiler {
    bool alias_resource_update = false;
  };
-  struct OutputDescription {
+  using OutputDescription = ::tensorflow::XlaOutputDescription;
    // Type and shape of the output. The shape is the unflattened shape.
    // When `type` is DT_RESOURCE, `shape` is the shape of the resource
    // variable's value.
    DataType type;
    TensorShape shape;
-    // Constant output value, if known to be constant at JIT compilation time.
+  using ResourceUpdate = ::tensorflow::XlaResourceUpdate;
    // 'Tensor' is in host memory.
    bool is_constant = false;
    Tensor constant_value;
-    // When this output is a resource, i.e. `type == DT_RESOURCE`, this is
+  using CompilationResult = ::tensorflow::XlaCompilationResult;
    // the index of the input that contains the resource.
    int input_index;
    // Whether this output is a TensorList.
    bool is_tensor_list = false;
  };
  // Describes a variable write side effect of the computation.
  struct ResourceUpdate {
    // Index of the input that contains the variable resource to write to.
    int input_index;
    // Type and shape of the tensor to be written back.
    // The `shape` field has the same meaning as the Argument::shape field.
    DataType type;
    TensorShape shape;
    // Was the value of the variable modified by the computation?
    // (Always true, unless `return_updated_values_for_all_resources` is true.)
    bool modified;
    // If the resource is a TensorArray, the set of gradients read or written.
    std::set<string> tensor_array_gradients_accessed;
  };
  struct CompilationResult {
    // Vector that maps from the parameters of the XLA computation to their
    // original argument positions. To handle compile-time constant inputs, the
    // parameters to the XLA computation may be a subset of the original
    // arguments. The relative ordering of parameters are maintained.
    std::vector<int> input_mapping;
    // Input shapes of the computation. If we are flattening inputs, these are
    // the flattened shapes.
    std::vector<xla::Shape> xla_input_shapes;
    // Output shape in XLA format. The output shape is always a tuple. If we
    // are flattening outputs, these are the flattened shapes.
    xla::Shape xla_output_shape;
    // TensorFlow shapes of outputs, together with the values of any
    // constant arguments. Vector indexed by Tensorflow _Retval number,
    // containing both constant and non-constant results.
    std::vector<OutputDescription> outputs;
    // TensorFlow shapes and types of sends/recvs from HostCompute Ops to their
    // matching RecvAtHost/SendFromHost Ops in the outer graph.
    tf2xla::HostComputeMetadata host_compute_metadata;
    // Resources whose values were updated by the computation, ordered
    // by return value position (which is the same as the order the resources
    // were passed as arguments). Resource updates follow the non-constant
    // results in the outputs of XLA computation.
    std::vector<ResourceUpdate> resource_updates;
    // The XLA computation built from the tensorflow subgraph.
    std::shared_ptr<xla::XlaComputation> computation;
  };
  typedef std::function<xla::StatusOr<xla::Shape>(const TensorShape&, DataType,
                                                  bool)>
@ -518,21 +365,6 @@ class XlaCompiler {
  TF_DISALLOW_COPY_AND_ASSIGN(XlaCompiler);
 };
 // Creates an identity shape representation function.
 XlaCompiler::ShapeRepresentationFn IdentityShapeRepresentationFn();
 // Rewrites the layout of xla_shape if there is tiled sharding.
 Status RewriteLayoutWithShardedShape(
    const absl::optional<xla::HloSharding>& sharding, bool use_fast_memory,
    XlaCompiler::ShapeRepresentationFn shape_representation_fn,
    xla::Shape* xla_shape);
 // Adds reshapes to fix the layout of an output, if a shape_representation_fn or
 // sharding is present.
 xla::StatusOr<xla::XlaOp> ReshapeWithCorrectRepresentationAndSharding(
    xla::XlaBuilder* builder, xla::XlaOp original, xla::Shape original_shape,
    XlaCompiler::ShapeRepresentationFn shape_representation_fn,
    absl::optional<xla::OpSharding> sharding, bool fast_mem);
 }  // namespace tensorflow
--- a/tensorflow/compiler/tf2xla/xla_context.cc
+++ b/tensorflow/compiler/tf2xla/xla_context.cc
@ -24,7 +24,6 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/xla/client/client_library.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
--- a/tensorflow/compiler/tf2xla/xla_context.h
+++ b/tensorflow/compiler/tf2xla/xla_context.h
@ -20,7 +20,6 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
 #include "tensorflow/compiler/tf2xla/xla_expression.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
@ -33,6 +32,7 @@ limitations under the License.
 namespace tensorflow {
 class XlaOpKernelContext;
 class XlaCompiler;
 // The XlaContext is the data structure that holds the state of an XLA
 // compilation, that is accessible from OpKernelContexts when compiling a
--- a/tensorflow/compiler/tf2xla/xla_expression.cc
+++ b/tensorflow/compiler/tf2xla/xla_expression.cc
@ -163,4 +163,23 @@ xla::StatusOr<TensorShape> XlaExpression::GetShape() const {
  }
 }
 const XlaExpression* XlaExpression::CastExpressionFromTensor(
    const Tensor& tensor) {
  const XlaExpression* expression =
      reinterpret_cast<const XlaExpression*>(tensor.tensor_data().data());
  CHECK(expression->kind() != XlaExpression::Kind::kInvalid)
      << expression->HumanString();
  return expression;
 }
 // Assigns an XlaExpression to a tensor on an XLA compilation device.
 void XlaExpression::AssignExpressionToTensor(const XlaExpression& value,
                                             Tensor* tensor) {
  const XlaExpression* expression =
      reinterpret_cast<const XlaExpression*>(tensor->tensor_data().data());
  CHECK(expression->kind() == XlaExpression::Kind::kInvalid)
      << expression->HumanString();
  *const_cast<XlaExpression*>(expression) = value;
 }
 }  // namespace tensorflow
--- a/tensorflow/compiler/tf2xla/xla_expression.h
+++ b/tensorflow/compiler/tf2xla/xla_expression.h
@ -104,6 +104,13 @@ class XlaExpression {
  // not the shape of the resource's value.
  xla::StatusOr<TensorShape> GetShape() const;
  // Retrieves an XlaExpression that was allocated by a previous Op.
  static const XlaExpression* CastExpressionFromTensor(const Tensor& tensor);
  // Assigns an XlaExpression to a tensor on an XLA compilation device.
  static void AssignExpressionToTensor(const XlaExpression& value,
                                       Tensor* tensor);
 private:
  Kind kind_ = Kind::kInvalid;
--- a/tensorflow/compiler/tf2xla/xla_helpers.cc
+++ b/tensorflow/compiler/tf2xla/xla_helpers.cc
@ -22,8 +22,6 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/literal_util.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_context.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
 #include "tensorflow/compiler/xla/client/lib/constants.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
@ -128,4 +126,93 @@ xla::XlaOp XlaHelpers::ConvertElementType(const xla::XlaOp& operand,
  return xla::ConvertElementType(operand, convert_to);
 }
 XlaHelpers::ShapeRepresentationFn IdentityShapeRepresentationFn() {
  return [](const TensorShape& shape, DataType dtype,
            bool use_fast_memory) -> xla::StatusOr<xla::Shape> {
    xla::Shape xla_shape;
    TF_RETURN_IF_ERROR(TensorShapeToXLAShape(dtype, shape, &xla_shape));
    return xla_shape;
  };
 }
 // Rewrites the layout of xla_shape if there is tiled sharding.
 Status RewriteLayoutWithShardedShape(
    const absl::optional<xla::HloSharding>& sharding, bool use_fast_memory,
    XlaHelpers::ShapeRepresentationFn shape_representation_fn,
    xla::Shape* xla_shape) {
  if (sharding && !sharding->IsTileMaximal()) {
    // After sharding, per core shape might have different layout. For example,
    // before sharding, a shape [128, 128] will be assigned default
    // minor-to-major {1, 0}. But after we shard this shape to [128, 64] * 2,
    // the sharded shapes will have minor-to-major {0, 1}.
    //
    // As a result, for sharded shapes, we set their layout to per core shape's
    // layout.
    //
    // TODO(endlessroad): for variable input & update, we might have
    // different layouts which will prevent input output aliasing and
    // increase memory usage. Investigate such cases.
    int64 device = *sharding->tile_assignment().begin();
    std::vector<int64> offset =
        sharding->TileOffsetForDevice(*xla_shape, device);
    std::vector<int64> limit = sharding->TileLimitForDevice(*xla_shape, device);
    std::vector<int64> dimensions(xla_shape->rank());
    for (int64 i = 0; i < xla_shape->rank(); ++i) {
      dimensions[i] = limit[i] - offset[i];
    }
    xla::Shape per_device_xla_shape =
        xla::ShapeUtil::MakeShape(xla_shape->element_type(), dimensions);
    TensorShape per_device_tensor_shape;
    TF_RETURN_IF_ERROR(
        XLAShapeToTensorShape(per_device_xla_shape, &per_device_tensor_shape));
    TF_ASSIGN_OR_RETURN(DataType dtype, EncodePrimitiveTypeAsDataType(
                                            xla_shape->element_type()));
    TF_ASSIGN_OR_RETURN(per_device_xla_shape,
                        shape_representation_fn(per_device_tensor_shape, dtype,
                                                use_fast_memory));
    *xla_shape->mutable_layout() = per_device_xla_shape.layout();
  }
  return Status::OK();
 }
 // There is a shape_representation_fn or sharding for an output, this function
 // uses a reshape to fix the layout.
 xla::StatusOr<xla::XlaOp> ReshapeWithCorrectRepresentationAndSharding(
    xla::XlaBuilder* builder, xla::XlaOp original, xla::Shape original_shape,
    XlaHelpers::ShapeRepresentationFn shape_representation_fn,
    absl::optional<xla::OpSharding> sharding, bool fast_mem) {
  if (original_shape.IsTuple()) {
    std::vector<xla::XlaOp> elements;
    for (int64 i = 0; i < original_shape.tuple_shapes_size(); ++i) {
      auto subsharding = sharding ? sharding->tuple_shardings(i) : sharding;
      TF_ASSIGN_OR_RETURN(auto element,
                          ReshapeWithCorrectRepresentationAndSharding(
                              builder, xla::GetTupleElement(original, i),
                              original_shape.tuple_shapes(i),
                              shape_representation_fn, subsharding, fast_mem));
      elements.push_back(element);
    }
    return xla::Tuple(builder, elements);
  }
  if (!original_shape.IsArray()) return original;
  TensorShape shape;
  TF_RETURN_IF_ERROR(XLAShapeToTensorShape(original_shape, &shape));
  TF_ASSIGN_OR_RETURN(DataType dtype, EncodePrimitiveTypeAsDataType(
                                          original_shape.element_type()));
  TF_ASSIGN_OR_RETURN(auto to_shape,
                      shape_representation_fn(shape, dtype, fast_mem));
  if (sharding) {
    TF_ASSIGN_OR_RETURN(auto hlo_sharding,
                        xla::HloSharding::FromProto(*sharding));
    TF_RETURN_IF_ERROR(RewriteLayoutWithShardedShape(
        hlo_sharding, fast_mem, shape_representation_fn, &to_shape));
  }
  if (xla::ShapeUtil::Compatible(original_shape, to_shape)) {
    for (int64 i = 0; i < original_shape.rank(); ++i) {
      to_shape.set_dynamic_dimension(i, original_shape.is_dynamic_dimension(i));
    }
  }
  return xla::Reshape(to_shape, original);
 }
 }  // end namespace tensorflow
--- a/tensorflow/compiler/tf2xla/xla_helpers.h
+++ b/tensorflow/compiler/tf2xla/xla_helpers.h
@ -19,8 +19,9 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_TF2XLA_XLA_HELPERS_H_
 #include "absl/types/span.h"
-#include "tensorflow/compiler/tf2xla/xla_context.h"
+#include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/service/hlo_sharding.h"
 #include "tensorflow/core/framework/tensor.h"
 namespace tensorflow {
@ -72,6 +73,98 @@ class XlaHelpers {
  // than the xla::PrimitiveType.
  static xla::XlaOp ConvertElementType(const xla::XlaOp& operand,
                                       const DataType new_element_type);
  typedef std::function<xla::StatusOr<xla::Shape>(const TensorShape&, DataType,
                                                  bool)>
      ShapeRepresentationFn;
 };
 // Creates an identity shape representation function.
 XlaHelpers::ShapeRepresentationFn IdentityShapeRepresentationFn();
 // Rewrites the layout of xla_shape if there is tiled sharding.
 Status RewriteLayoutWithShardedShape(
    const absl::optional<xla::HloSharding>& sharding, bool use_fast_memory,
    XlaHelpers::ShapeRepresentationFn shape_representation_fn,
    xla::Shape* xla_shape);
 // Adds reshapes to fix the layout of an output, if a shape_representation_fn or
 // sharding is present.
 xla::StatusOr<xla::XlaOp> ReshapeWithCorrectRepresentationAndSharding(
    xla::XlaBuilder* builder, xla::XlaOp original, xla::Shape original_shape,
    XlaHelpers::ShapeRepresentationFn shape_representation_fn,
    absl::optional<xla::OpSharding> sharding, bool fast_mem);
 struct XlaOutputDescription {
  // Type and shape of the output. The shape is the unflattened shape.
  // When `type` is DT_RESOURCE, `shape` is the shape of the resource
  // variable's value.
  DataType type;
  TensorShape shape;
  // Constant output value, if known to be constant at JIT compilation time.
  // 'Tensor' is in host memory.
  bool is_constant = false;
  Tensor constant_value;
  // When this output is a resource, i.e. `type == DT_RESOURCE`, this is
  // the index of the input that contains the resource.
  int input_index;
  // Whether this output is a TensorList.
  bool is_tensor_list = false;
 };
 // Describes a variable write side effect of the computation.
 struct XlaResourceUpdate {
  // Index of the input that contains the variable resource to write to.
  int input_index;
  // Type and shape of the tensor to be written back.
  // The `shape` field has the same meaning as the Argument::shape field.
  DataType type;
  TensorShape shape;
  // Was the value of the variable modified by the computation?
  // (Always true, unless `return_updated_values_for_all_resources` is true.)
  bool modified;
  // If the resource is a TensorArray, the set of gradients read or written.
  std::set<string> tensor_array_gradients_accessed;
 };
 struct XlaCompilationResult {
  // Vector that maps from the parameters of the XLA computation to their
  // original argument positions. To handle compile-time constant inputs, the
  // parameters to the XLA computation may be a subset of the original
  // arguments. The relative ordering of parameters are maintained.
  std::vector<int> input_mapping;
  // Input shapes of the computation. If we are flattening inputs, these are
  // the flattened shapes.
  std::vector<xla::Shape> xla_input_shapes;
  // Output shape in XLA format. The output shape is always a tuple. If we
  // are flattening outputs, these are the flattened shapes.
  xla::Shape xla_output_shape;
  // TensorFlow shapes of outputs, together with the values of any
  // constant arguments. Vector indexed by Tensorflow _Retval number,
  // containing both constant and non-constant results.
  std::vector<XlaOutputDescription> outputs;
  // TensorFlow shapes and types of sends/recvs from HostCompute Ops to their
  // matching RecvAtHost/SendFromHost Ops in the outer graph.
  tf2xla::HostComputeMetadata host_compute_metadata;
  // Resources whose values were updated by the computation, ordered
  // by return value position (which is the same as the order the resources
  // were passed as arguments). Resource updates follow the non-constant
  // results in the outputs of XLA computation.
  std::vector<XlaResourceUpdate> resource_updates;
  // The XLA computation built from the tensorflow subgraph.
  std::shared_ptr<xla::XlaComputation> computation;
 };
 }  // end namespace tensorflow
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
@ -49,33 +49,13 @@ XlaCompiler* XlaOpKernelContext::compiler() const {
  return xla_context()->compiler();
 }
 // Retrieves an XlaExpression that was allocated by a previous Op.
 const XlaExpression* XlaOpKernelContext::CastExpressionFromTensor(
    const Tensor& tensor) {
  const XlaExpression* expression =
      reinterpret_cast<const XlaExpression*>(tensor.tensor_data().data());
  CHECK(expression->kind() != XlaExpression::Kind::kInvalid)
      << expression->HumanString();
  return expression;
 }
 // Assigns an XlaExpression to a tensor on an XLA compilation device.
 void XlaOpKernelContext::AssignExpressionToTensor(const XlaExpression& value,
                                                  Tensor* tensor) {
  const XlaExpression* expression =
      reinterpret_cast<const XlaExpression*>(tensor->tensor_data().data());
  CHECK(expression->kind() == XlaExpression::Kind::kInvalid)
      << expression->HumanString();
  *const_cast<XlaExpression*>(expression) = value;
 }
 const XlaExpression& XlaOpKernelContext::InputExpression(int index) {
-  return *CastExpressionFromTensor(context_->input(index));
+  return *XlaExpression::CastExpressionFromTensor(context_->input(index));
 }
 const XlaExpression& XlaOpKernelContext::InputExpression(
    absl::string_view name) {
-  return *CastExpressionFromTensor(GetInputTensorByName(name));
+  return *XlaExpression::CastExpressionFromTensor(GetInputTensorByName(name));
 }
 xla::XlaOp XlaOpKernelContext::Input(int index) {
@ -108,7 +88,8 @@ DataType XlaOpKernelContext::input_type(int index) const {
  if (type == DT_UINT8) {
    // Masqueraded XlaExpression could have different type. See
    // XlaOpKernelContext::SetOutputExpression for details.
-    auto expression = CastExpressionFromTensor(context_->input(index));
+    auto expression =
        XlaExpression::CastExpressionFromTensor(context_->input(index));
    type = expression->dtype();
  }
  return type;
@ -120,7 +101,7 @@ DataType XlaOpKernelContext::InputType(absl::string_view name) {
  if (type == DT_UINT8) {
    // Masqueraded XlaExpression could have different type. See
    // XlaOpKernelContext::SetOutputExpression for details.
-    auto expression = CastExpressionFromTensor(tensor);
+    auto expression = XlaExpression::CastExpressionFromTensor(tensor);
    type = expression->dtype();
  }
  return type;
@ -385,7 +366,8 @@ Status XlaOpKernelContext::InputList(absl::string_view name,
  handles->clear();
  shapes->clear();
  for (const Tensor& input : inputs) {
-    handles->push_back(CastExpressionFromTensor(input)->AsXlaOp(builder()));
+    handles->push_back(
        XlaExpression::CastExpressionFromTensor(input)->AsXlaOp(builder()));
    shapes->push_back(input.shape());
  }
  return Status::OK();
@ -408,7 +390,7 @@ Status ReadVariableInputTensor(const Tensor& tensor, DataType type,
                               const XlaOpKernelContext* ctx,
                               TensorShape* shape, xla::XlaOp* value) {
  const XlaExpression* expression =
-      XlaOpKernelContext::CastExpressionFromTensor(tensor);
+      XlaExpression::CastExpressionFromTensor(tensor);
  XlaResource* variable = expression->resource();
  TF_RET_CHECK(variable != nullptr);
  TF_RET_CHECK(variable->kind() == XlaResource::kVariable);
@ -461,7 +443,8 @@ Status XlaOpKernelContext::ReadVariableInput(absl::string_view name,
 Status XlaOpKernelContext::GetVariableTypeAndShape(int index, DataType* type,
                                                   TensorShape* shape) const {
  const Tensor& tensor = context_->input(index);
-  const XlaExpression* expression = CastExpressionFromTensor(tensor);
+  const XlaExpression* expression =
      XlaExpression::CastExpressionFromTensor(tensor);
  XlaResource* variable = expression->resource();
  TF_RET_CHECK(variable != nullptr);
  TF_RET_CHECK(variable->kind() == XlaResource::kVariable);
@ -502,8 +485,8 @@ void XlaOpKernelContext::SetOutputExpression(int index,
      TF_ASSIGN_OR_RETURN(TensorShape shape, expression.GetShape());
      TF_RETURN_IF_ERROR(context_->allocate_output(index, shape, &output));
    }
-    XlaOpKernelContext::AssignExpressionToTensor(
+    XlaExpression::AssignExpressionToTensor(expression,
-        expression, context_->mutable_output(index));
+                                            context_->mutable_output(index));
    return Status::OK();
  }();
  if (!status.ok()) {
@ -542,7 +525,7 @@ void XlaOpKernelContext::SetResourceOutput(int index, XlaResource* resource) {
 Status XlaOpKernelContext::GetResourceInput(int index, XlaResource** resource) {
  const XlaExpression* expression =
-      CastExpressionFromTensor(context_->input(index));
+      XlaExpression::CastExpressionFromTensor(context_->input(index));
  TF_RET_CHECK(expression->resource() != nullptr);
  *resource = expression->resource();
  return Status::OK();
@ -554,7 +537,7 @@ Status AssignVariableTensor(const Tensor& tensor, DataType type,
                            const XlaOpKernelContext* ctx, xla::XlaOp handle,
                            xla::XlaBuilder* builder) {
  const XlaExpression* expression =
-      XlaOpKernelContext::CastExpressionFromTensor(tensor);
+      XlaExpression::CastExpressionFromTensor(tensor);
  XlaResource* variable = expression->resource();
  TF_RET_CHECK(variable != nullptr);
  TF_RET_CHECK(variable->kind() == XlaResource::kVariable);
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.h
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h
@ -17,6 +17,9 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_TF2XLA_XLA_OP_KERNEL_H_
 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
 #include "tensorflow/compiler/tf2xla/xla_context.h"
 #include "tensorflow/compiler/tf2xla/xla_expression.h"
 #include "tensorflow/compiler/tf2xla/xla_resource.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
@ -284,13 +287,6 @@ class XlaOpKernelContext {
  // separate specialization of the computation for each DataType.
  const xla::XlaComputation* GetOrCreateMul(const DataType type);
  // Assigns an XlaExpression to a tensor on an XLA compilation device.
  static void AssignExpressionToTensor(const XlaExpression& value,
                                       Tensor* tensor);
  // Retrieves an XlaExpression that was assigned to the specified tensor.
  static const XlaExpression* CastExpressionFromTensor(const Tensor& tensor);
 private:
  // Returns the tensor of input `name`.
  const Tensor& GetInputTensorByName(absl::string_view name);
--- a/tensorflow/compiler/tf2xla/xla_resource.cc
+++ b/tensorflow/compiler/tf2xla/xla_resource.cc
@ -21,7 +21,6 @@ limitations under the License.
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/sharding_util.h"
 #include "tensorflow/compiler/tf2xla/xla_context.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
--- a/tensorflow/core/tpu/BUILD
+++ b/tensorflow/core/tpu/BUILD
@ -57,6 +57,7 @@ cc_library(
        ":tpu_defs",
        ":tpu_node_device_util",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
    ],
    alwayslink = 1,
 )
@ -180,8 +181,8 @@ cc_library(
        "//tensorflow/compiler/tf2xla:common",
        "//tensorflow/compiler/tf2xla:tf2xla_util",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/core:core_cpu_internal",
        "//tensorflow/core:framework",
        "//tensorflow/core:framework_internal",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
--- a/tensorflow/core/tpu/kernels/BUILD
+++ b/tensorflow/core/tpu/kernels/BUILD
@ -656,6 +656,7 @@ cc_library(
    deps = [
        "//tensorflow/compiler/tf2xla:common",
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/xla:shape_util",
        "//tensorflow/compiler/xla:util",
        "//tensorflow/compiler/xla:xla_data_proto_cc",
@ -673,6 +674,7 @@ cc_library(
    srcs = ["topk_ops.cc"],
    deps = [
        "//tensorflow/compiler/tf2xla:xla_compiler",
        "//tensorflow/compiler/tf2xla:xla_op_registry",
        "//tensorflow/compiler/xla/client:xla_builder",
        "//tensorflow/compiler/xla/client/lib:arithmetic",
        "//tensorflow/core/tpu:tpu_defs",