diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h
index 19a0e6ef6da..515b58d2f5d 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.h
+++ b/tensorflow/compiler/xla/service/buffer_assignment.h
@@ -358,6 +358,14 @@ class BufferAssignment {
     return allocations_;
   }
 
+  // This is similar to copying Allocations(), but since it's moved out, it
+  // preserves the addresses. Since BufferAllocation::Slice keeps a
+  // BufferAllocation*, and some backends keep BufferAllocation::Slice in
+  // xla::Executables, migrating off the use of addresses can be hard.
+  std::vector<BufferAllocation> ReleaseAllocations() {
+    return std::move(allocations_);
+  }
+
   // Returns the total size allocation holding all temporary buffers.
   int64 temp_allocation_total_size() const {
     return temp_allocation_total_size_;
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 2b6968162ca..a456b3f026d 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -672,8 +672,8 @@ cc_library(
         "gpu_debug_info_manager.h",
     ],
     deps = [
-        "//tensorflow/compiler/xla/service:buffer_assignment",
         "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_proto_cc",
         "//tensorflow/compiler/xla/service:hlo_proto_util",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
@@ -685,15 +685,10 @@ tf_cc_test(
     srcs = ["gpu_debug_info_manager_test.cc"],
     tags = tf_cuda_tests_tags(),
     deps = [
-        ":gpu_constants",
         ":gpu_debug_info_manager",
-        ":gpu_hlo_schedule",
-        ":stream_assignment",
-        "//tensorflow/compiler/xla/service:buffer_assignment",
+        "//tensorflow/compiler/xla/service:hlo_proto_cc",
         "//tensorflow/compiler/xla/tests:hlo_test_base",
-        "//tensorflow/compiler/xla/tests:test_utils",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
-        "//tensorflow/core:test",
     ],
 )
 
diff --git a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc
index cac335ce087..a89cb435c67 100644
--- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc
+++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc
@@ -34,13 +34,13 @@ namespace gpu {
 
 Status BufferAllocations::TearDown(
     const std::set<se::DeviceMemoryBase>& live_addresses,
-    const BufferAssignment* buffer_assignment) {
+    absl::Span<const BufferAllocation> allocations) {
   // Deallocate temporary buffers, taking care to try to deallocate all of them
   // even if one of the deallocations fails.
   Status status;
-  const int64 num_buffers = buffer_assignment->Allocations().size();
+  const int64 num_buffers = allocations.size();
   for (BufferAllocation::Index i = 0; i < num_buffers; ++i) {
-    const BufferAllocation& allocation = buffer_assignment->GetAllocation(i);
+    const BufferAllocation& allocation = allocations[i];
     se::DeviceMemoryBase buffer_address = GetDeviceAddress(allocation.index());
     // Deallocate buffers marked "maybe_live_out" but aren't actually live out,
     // and temp buffers.
diff --git a/tensorflow/compiler/xla/service/gpu/buffer_allocations.h b/tensorflow/compiler/xla/service/gpu/buffer_allocations.h
index 0d534b0d286..d5fa8c551d7 100644
--- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.h
+++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.h
@@ -70,7 +70,7 @@ class BufferAllocations {
   // Tears down all buffers allocated by this object that are not in
   // `live_addresses`.
   Status TearDown(const std::set<se::DeviceMemoryBase>& live_addresses,
-                  const BufferAssignment* buffer_assignment);
+                  absl::Span<const BufferAllocation> allocations);
 
   std::string ToString() {
     std::string out;
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 8a88a31d02a..8084e0eb71d 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -869,13 +869,18 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
       absl::flat_hash_map<ShapeIndex, GpuExecutable::OutputInfo>;
   TF_ASSIGN_OR_RETURN(OutputInfoMap output_info,
                       GetOutputInfo(*module, *buffer_assignment));
+  auto buffer_assignment_proto =
+      std::make_unique<BufferAssignmentProto>(buffer_assignment->ToProto());
+  std::vector<BufferAllocation> allocations =
+      buffer_assignment->ReleaseAllocations();
 
   GpuVersion gpu_version = GetGpuVersion(stream_exec);
   auto* gpu_executable = new GpuExecutable(
       {std::move(backend_result.first), std::move(backend_result.second),
        gpu_version, std::move(thunk_schedule), std::move(constants),
-       std::move(output_info), std::move(module), std::move(buffer_assignment),
-       std::move(profile_printer), std::move(profile_index_map)});
+       std::move(output_info), std::move(module), std::move(allocations),
+       std::move(buffer_assignment_proto), std::move(profile_printer),
+       std::move(profile_index_map)});
   if (embed_ir_in_executable) {
     DCHECK_NE("", ir_module_string_before_opt);
     gpu_executable->set_ir_module_string(ir_module_string_before_opt);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.cc b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.cc
index 51888c0b8c7..9851ce0b8af 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.cc
@@ -22,7 +22,7 @@ namespace gpu {
 
 void GpuDebugInfoManager::RegisterModule(
     const ModuleIdentifier& module_id, std::shared_ptr<HloModule> hlo_module,
-    std::shared_ptr<const BufferAssignment> buffer_assignment) {
+    std::shared_ptr<const BufferAssignmentProto> buffer_assignment) {
   tensorflow::mutex_lock lock(mutex_);
   if (active_modules_.find(module_id) != active_modules_.end()) {
     active_modules_[module_id].instances.emplace_back(hlo_module,
@@ -40,7 +40,7 @@ void GpuDebugInfoManager::RegisterModule(
 // However during tracing, we will defer the cleanup after serialization.
 void GpuDebugInfoManager::UnregisterModule(
     const ModuleIdentifier& module_id, std::shared_ptr<HloModule> hlo_module,
-    std::shared_ptr<const BufferAssignment> buffer_assignment) {
+    std::shared_ptr<const BufferAssignmentProto> buffer_assignment) {
   tensorflow::mutex_lock lock(mutex_);
   CHECK(active_modules_.find(module_id) != active_modules_.end());
   GpuModuleEntry& active_module = active_modules_[module_id];
@@ -146,8 +146,10 @@ void GpuDebugInfoManager::StopTracing(
       // non-nullptr. Due to the inconvenience of creation of buffer_assignment
       // object in test, we set it to nullptr and guard this for it.
       if (m.instances[0].hlo_module && m.instances[0].buffer_assignment) {
-        info.hlo_proto = absl::make_unique<HloProto>(MakeHloProto(
-            *m.instances[0].hlo_module, *m.instances[0].buffer_assignment));
+        info.hlo_proto = absl::make_unique<HloProto>(
+            MakeHloProto(*m.instances[0].hlo_module));
+        *info.hlo_proto->mutable_buffer_assignment() =
+            *m.instances[0].buffer_assignment;
       }
       module_debug_info->emplace_back(std::move(info));
     }
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h
index 0a8b444243e..36d4435d284 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h
@@ -17,7 +17,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_DEBUG_INFO_MANAGER_H_
 
 #include "absl/container/flat_hash_map.h"
-#include "tensorflow/compiler/xla/service/buffer_assignment.h"
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/core/lib/core/status.h"
 
@@ -56,14 +56,14 @@ class GpuDebugInfoManager {
   // Modules with same module id can be registered and tracked separately.
   void RegisterModule(
       const ModuleIdentifier& module_id, std::shared_ptr<HloModule> hlo_module,
-      std::shared_ptr<const BufferAssignment> buffer_assignment);
+      std::shared_ptr<const BufferAssignmentProto> buffer_assignment);
 
   // Unregister an active module. When the last active module of the same
   // module id is out of scope, we remove it from our database.
   // However during tracing, we will defer the cleanup after serialization.
   void UnregisterModule(
       const ModuleIdentifier& module_id, std::shared_ptr<HloModule> hlo_module,
-      std::shared_ptr<const BufferAssignment> buffer_assignment);
+      std::shared_ptr<const BufferAssignmentProto> buffer_assignment);
 
   // Register when the module start execution on certain device.
   // TODO(jiesun): Do we need to track which device this is?
@@ -110,10 +110,10 @@ class GpuDebugInfoManager {
   // tracking, they need to be tracked separately.
   struct GpuModuleInstance {
     GpuModuleInstance(std::shared_ptr<HloModule> m,
-                      std::shared_ptr<const BufferAssignment> b)
+                      std::shared_ptr<const BufferAssignmentProto> b)
         : hlo_module(std::move(m)), buffer_assignment(std::move(b)) {}
     std::shared_ptr<HloModule> hlo_module;
-    std::shared_ptr<const BufferAssignment> buffer_assignment;
+    std::shared_ptr<const BufferAssignmentProto> buffer_assignment;
     bool active = true;
   };
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager_test.cc
index 5ea26c55823..e0d42a3d692 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager_test.cc
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/compiler/xla/service/gpu/gpu_debug_info_manager.h"
 
-#include "tensorflow/compiler/xla/service/buffer_assignment.h"
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
 
 namespace xla {
@@ -30,7 +30,7 @@ class GpuDebugInfoManagerTest : public HloTestBase {
     int unique_id;
     string id;
     std::shared_ptr<HloModule> module;
-    std::shared_ptr<BufferAssignment> buffer_assignment;
+    std::shared_ptr<BufferAssignmentProto> buffer_assignment;
   };
 
   // Return unique id of this module.
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index 336665a8058..51cd70e8bbb 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -62,18 +62,19 @@ GpuExecutable::GpuExecutable(GpuExecutable::Params params)
       binary_(std::move(params.binary)),
       gpu_version_(params.gpu_version),
       thunk_schedule_(std::move(params.thunk_schedule)),
-      assignment_(std::move(params.assignment)),
+      allocations_(std::move(params.allocations)),
+      debug_buffer_assignment_(std::move(params.debug_buffer_assignment)),
       constants_(std::move(params.constants)),
       output_info_(std::move(params.output_info)) {
-  CHECK(has_module() && assignment_);
+  CHECK(has_module());
   GpuDebugInfoManager::Get()->RegisterModule(module().name(), shared_module(),
-                                             assignment_);
+                                             debug_buffer_assignment_);
 }
 
 GpuExecutable::~GpuExecutable() {
-  CHECK(has_module() && assignment_);
+  CHECK(has_module());
   GpuDebugInfoManager::Get()->UnregisterModule(module().name(), shared_module(),
-                                               assignment_);
+                                               debug_buffer_assignment_);
 
   {
     // We could have issued host->device mem copies in ResolveConstantGlobals.
@@ -376,11 +377,11 @@ StatusOr<BufferAllocations> GpuExecutable::GenerateBufferAllocations(
       [&] { return std::string("Build buffer allocations"); },
       tensorflow::profiler::TraceMeLevel::kInfo);
 
-  const int64 num_buffers = assignment_->Allocations().size();
+  const int64 num_buffers = allocations_.size();
   std::vector<se::DeviceMemoryBase> buffers;
   buffers.reserve(num_buffers);
   for (int64 i = 0; i < num_buffers; ++i) {
-    const BufferAllocation& allocation = assignment_->GetAllocation(i);
+    const BufferAllocation& allocation = allocations_[i];
     TF_ASSIGN_OR_RETURN(
         se::DeviceMemoryBase buffer,
         BufferForAllocation(arguments, globals, allocation, memory_allocator,
@@ -537,7 +538,7 @@ StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStream(
 
   // Free all temporary allocations.
   TF_RETURN_IF_ERROR(
-      buffer_allocations.TearDown(buffers_in_result, assignment_.get()));
+      buffer_allocations.TearDown(buffers_in_result, allocations_));
 
   // Free allocations for arguments.
   MarkToBeReleasedArguments(absl::MakeSpan(arguments), result);
@@ -551,9 +552,8 @@ int64 GpuExecutable::SizeOfGeneratedCodeInBytes() const {
     return -1;
   }
   int64 size = binary().size();
-  for (BufferAllocation::Index i = 0; i < assignment_->Allocations().size();
-       ++i) {
-    const BufferAllocation& allocation = assignment_->GetAllocation(i);
+  for (BufferAllocation::Index i = 0; i < allocations_.size(); ++i) {
+    const BufferAllocation& allocation = allocations_[i];
     if (allocation.is_constant()) {
       size += allocation.size();
     }
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
index 690fb6aa94d..e977248872a 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
@@ -71,7 +71,8 @@ class GpuExecutable : public Executable {
     std::vector<ConstantInfo> constants;
     absl::flat_hash_map<ShapeIndex, OutputInfo> output_info;
     std::unique_ptr<HloModule> hlo_module;
-    std::unique_ptr<const BufferAssignment> assignment;
+    std::vector<BufferAllocation> allocations;
+    std::unique_ptr<BufferAssignmentProto> debug_buffer_assignment;
     std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data = nullptr;
     std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map = nullptr;
   };
@@ -108,8 +109,8 @@ class GpuExecutable : public Executable {
       std::vector<ExecutionInput> arguments,
       HloExecutionProfile* hlo_execution_profile) override;
 
-  std::shared_ptr<const BufferAssignment> GetBufferAssignment() const {
-    return assignment_;
+  absl::Span<const BufferAllocation> GetAllocations() const {
+    return allocations_;
   }
 
  private:
@@ -176,7 +177,9 @@ class GpuExecutable : public Executable {
 
   // Owns the buffer data at runtime. It provides information to allocate
   // memory for every output/temp buffers.
-  const std::shared_ptr<const BufferAssignment> assignment_;
+  const std::vector<BufferAllocation> allocations_;
+
+  std::shared_ptr<BufferAssignmentProto> debug_buffer_assignment_;
 
   // Cache of module handles and constant buffer allocation maps used by
   // `ResolveConstantGlobals`.
diff --git a/tensorflow/compiler/xla/service/gpu/tests/gemm_rewrite_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gemm_rewrite_test.cc
index bc832b4717a..9581673ffdb 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/gemm_rewrite_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/tests/gemm_rewrite_test.cc
@@ -46,12 +46,9 @@ class GemmRewriteTest : public GpuCodegenTest {
             backend().default_stream_executor()->GetAllocator()));
     GpuExecutable* gpu_executable =
         static_cast<GpuExecutable*>(executable.get());
-    std::shared_ptr<const BufferAssignment> buffer_assignment =
-        gpu_executable->GetBufferAssignment();
-    CHECK_EQ(buffer_assignment->Allocations().size(),
-             expected_number_of_allocations)
-        << "Unexpected buffer assignment. Was:\n"
-        << buffer_assignment->ToString();
+    absl::Span<const BufferAllocation> allocations =
+        gpu_executable->GetAllocations();
+    CHECK_EQ(allocations.size(), expected_number_of_allocations);
   }
 };
 
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc
index 8648ba3c684..908355fac27 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc
@@ -589,13 +589,14 @@ StatusOr<std::unique_ptr<Executable>> MlirCompilerImpl::RunBackend(
 
   TF_ASSIGN_OR_RETURN(auto output_info,
                       xla::gpu::GetOutputInfo(*module, *buffer_assignment));
+  std::vector<BufferAllocation> allocations =
+      buffer_assignment->ReleaseAllocations();
 
   // TODO(b/137624192): Add profiling support.
   return {absl::make_unique<GpuExecutable>(GpuExecutable::Params{
       std::move(ptx), std::move(cubin), GetGpuVersion(stream_exec),
       std::move(thunk_schedule), std::vector<GpuExecutable::ConstantInfo>(),
-      std::move(output_info), std::move(module),
-      std::move(buffer_assignment)})};
+      std::move(output_info), std::move(module), std::move(allocations)})};
 }
 
 StatusOr<std::vector<std::unique_ptr<Executable>>> MlirCompilerImpl::Compile(