Finish migrating {Tf|Platform}GpuId to {Tf|Platform}DeviceId

PiperOrigin-RevId: 361252995 Change-Id: I818798fc00efe7b98c35145ce067204d9e023895
2021-03-05 16:56:23 -08:00 · 2021-03-05 16:56:23 -08:00 · d7634bbfaf
commit d7634bbfaf
parent fbd744dfdb
28 changed files with 398 additions and 379 deletions
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@ -43,15 +43,15 @@ static xla::StatusOr<absl::optional<std::set<int>>> ParseVisibleDeviceList(
  }
  const std::vector<string> visible_devices =
      absl::StrSplit(visible_device_list, ',');
-  for (const string& platform_gpu_id_str : visible_devices) {
-    int32 platform_gpu_id;
-    if (!absl::SimpleAtoi(platform_gpu_id_str, &platform_gpu_id)) {
+  for (const string& platform_device_id_str : visible_devices) {
+    int32 platform_device_id;
+    if (!absl::SimpleAtoi(platform_device_id_str, &platform_device_id)) {
      return errors::InvalidArgument(
          "Could not parse entry in 'visible_device_list': '",
-          platform_gpu_id_str,
+          platform_device_id_str,
          "'. visible_device_list = ", visible_device_list);
    }
-    gpu_ids.insert(platform_gpu_id);
+    gpu_ids.insert(platform_device_id);
  }
  return {{gpu_ids}};
 }
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
@ -102,19 +102,21 @@ struct EdgePtrCompare {
 // TODO(laigd): instead of deciding the device here, the converter should accept
 // a device name as one of the conversion parameter so users can control on
 // which device they want to run the conversion.
-std::pair<TfGpuId, PlatformGpuId> GetFirstValidDeviceId() {
-  for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) {
-    TfGpuId tf_gpu_id(tf_gpu_id_value);
-    PlatformGpuId platform_gpu_id;
-    Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
+std::pair<TfDeviceId, PlatformDeviceId> GetFirstValidDeviceId() {
+  for (int tf_device_id_value = 0; tf_device_id_value < 100;
+       ++tf_device_id_value) {
+    TfDeviceId tf_device_id(tf_device_id_value);
+    PlatformDeviceId platform_device_id;
+    Status s =
+        GpuIdManager::TfToPlatformDeviceId(tf_device_id, &platform_device_id);
    if (s.ok()) {
-      VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device "
-              << platform_gpu_id.value();
-      return std::make_pair(tf_gpu_id, platform_gpu_id);
+      VLOG(1) << "Found TF GPU " << tf_device_id.value() << " at cuda device "
+              << platform_device_id.value();
+      return std::make_pair(tf_device_id, platform_device_id);
    }
  }
  LOG(ERROR) << "Could not find any TF GPUs";
-  return std::make_pair(TfGpuId(-1), PlatformGpuId(-1));
+  return std::make_pair(TfDeviceId(-1), PlatformDeviceId(-1));
 }

 // Returns false for const nodes (we intend to drop control edges from those).
@ -266,14 +268,14 @@ Status GetEngineInfo(const Graph* g,
    }
    info->device = DeviceNameUtils::ParsedNameToString(segment_device);
  } else {
-    TfGpuId tf_gpu_id;
-    PlatformGpuId platform_gpu_id;
-    std::tie(tf_gpu_id, platform_gpu_id) = GetFirstValidDeviceId();
-    if (tf_gpu_id.value() >= 0) {
+    TfDeviceId tf_device_id;
+    PlatformDeviceId platform_device_id;
+    std::tie(tf_device_id, platform_device_id) = GetFirstValidDeviceId();
+    if (tf_device_id.value() >= 0) {
      DeviceNameUtils::ParsedName parsed_name;
      parsed_name.type = "GPU";
      parsed_name.has_type = true;
-      parsed_name.id = tf_gpu_id.value();
+      parsed_name.id = tf_device_id.value();
      parsed_name.has_id = true;
      info->device = DeviceNameUtils::ParsedNameToString(parsed_name);
    } else {
@ -640,17 +642,17 @@ std::pair<int, Allocator*> GetDeviceAndAllocator(const ConversionParams& params,
  if (params.cluster == nullptr || params.cluster->GetDeviceSet() == nullptr ||
      engine.device.empty()) {
    // If device is not set, use the first found GPU device for the conversion.
-    TfGpuId tf_gpu_id;
-    PlatformGpuId platform_gpu_id;
-    std::tie(tf_gpu_id, platform_gpu_id) = GetFirstValidDeviceId();
-    cuda_device_id = platform_gpu_id.value();
+    TfDeviceId tf_device_id;
+    PlatformDeviceId platform_device_id;
+    std::tie(tf_device_id, platform_device_id) = GetFirstValidDeviceId();
+    cuda_device_id = platform_device_id.value();
    if (cuda_device_id >= 0) {
      GPUOptions gpu_options;
      // If the TF to Cuda gpu id mapping exist, the device and corresponding
      // allocator must have been initialized already, so the
      // GetGPUAllocator() call won't create a new allocator.
      dev_allocator = GPUProcessState::singleton()->GetGPUAllocator(
-          gpu_options, tf_gpu_id, /*total_bytes=*/1, /*peer_gpu_ids=*/{});
+          gpu_options, tf_device_id, /*total_bytes=*/1, /*peer_gpu_ids=*/{});
    }
    return std::make_pair(cuda_device_id, dev_allocator);
  }
--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
+++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc
@ -1044,25 +1044,25 @@ Status TRTEngineOp::AllocateCalibrationResources(
  }
  cres->calibrator_.reset(
      new TRTInt8Calibrator(cres->device_buffers_, batch_size, name()));
-  const int platform_gpu_id =
+  const int platform_device_id =
      ctx->device()->tensorflow_gpu_device_info()->gpu_id;
-  if (platform_gpu_id < 0) {
+  if (platform_device_id < 0) {
    LOG(ERROR) << "Can't get gpu_device_info from context->device()";
    return errors::InvalidArgument(
        "Context->device doesn't contain device info!");
  }

  cache_res->Ref();
-  cres->thr_.reset(new std::thread([this, cres, shapes, platform_gpu_id,
+  cres->thr_.reset(new std::thread([this, cres, shapes, platform_device_id,
                                    cache_res]() {
    core::ScopedUnref sc(cache_res);

-    VLOG(1) << "Starting calibration thread on device " << platform_gpu_id
+    VLOG(1) << "Starting calibration thread on device " << platform_device_id
            << ", Calibration Resource @ " << cres;
-    auto err = cudaSetDevice(platform_gpu_id);
+    auto err = cudaSetDevice(platform_device_id);
    if (err != cudaSuccess) {
      // TODO(aaroey): should return error here.
-      LOG(ERROR) << "Couldn't set cuda device to " << platform_gpu_id
+      LOG(ERROR) << "Couldn't set cuda device to " << platform_device_id
                 << " in calibration thread";
    }
    std::vector<PartialTensorShape> partial_shapes(shapes.begin(),
--- a/tensorflow/core/common_runtime/device/device_event_mgr_test.cc
+++ b/tensorflow/core/common_runtime/device/device_event_mgr_test.cc
@ -149,7 +149,7 @@ class GPUDeviceTestHelper {
        DeviceFactory::NewDevice(DEVICE_GPU, sops, "/job:a/replica:0/task:0");
    gpu_.reset(reinterpret_cast<BaseGPUDevice*>(device_.release()));
    gpu_allocator_ = GPUProcessState::singleton()->GetGPUAllocator(
-        GPUOptions(), TfGpuId(0), memory_limit, /*peer_gpu_ids=*/{});
+        GPUOptions(), TfDeviceId(0), memory_limit, /*peer_gpu_ids=*/{});
    host_allocator_ = GPUProcessState::singleton()->GetGpuHostAllocator(0);
  }

--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc
@ -26,11 +26,11 @@ limitations under the License.

 namespace tensorflow {

-GPUcudaMallocAllocator::GPUcudaMallocAllocator(Allocator* allocator,
-                                               PlatformGpuId platform_gpu_id)
+GPUcudaMallocAllocator::GPUcudaMallocAllocator(
+    Allocator* allocator, PlatformDeviceId platform_device_id)
    : base_allocator_(allocator) {
  stream_exec_ = DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(),
-                                                           platform_gpu_id)
+                                                           platform_device_id)
                     .ValueOrDie();
 }

--- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h
@ -32,7 +32,7 @@ namespace tensorflow {
 class GPUcudaMallocAllocator : public Allocator {
 public:
  explicit GPUcudaMallocAllocator(Allocator* allocator,
-                                  PlatformGpuId platform_gpu_id);
+                                  PlatformDeviceId platform_device_id);
  ~GPUcudaMallocAllocator() override;
  string Name() override { return "gpu_debug"; }
  void* AllocateRaw(size_t alignment, size_t num_bytes) override;
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamallocasync_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamallocasync_allocator.cc
@ -42,12 +42,12 @@ static std::string GetCudaErrorMessage(CUresult result) {
 #endif  // GOOGLE_CUDA

 GpuCudaMallocAsyncAllocator::GpuCudaMallocAsyncAllocator(
-    PlatformGpuId platform_gpu_id, size_t pool_size, bool reserve_memory,
+    PlatformDeviceId platform_device_id, size_t pool_size, bool reserve_memory,
    bool compute_stats)
-    : name_(absl::StrCat("gpu_async_", platform_gpu_id.value())) {
+    : name_(absl::StrCat("gpu_async_", platform_device_id.value())) {
 #if TF_CUDA_MALLOC_ASYNC_SUPPORTED
  stream_exec_ = DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(),
-                                                           platform_gpu_id)
+                                                           platform_device_id)
                     .ValueOrDie();
  // Initialized here as it only exist if compiled with a recent
  // enough CUDA.
@ -56,7 +56,7 @@ GpuCudaMallocAsyncAllocator::GpuCudaMallocAsyncAllocator(
  // WAR an CUDA 11.2 driver bug for multiple-GPU. It currently
  // request that the context on GPU 0 is initialized. Which isn't the
  // case for TF+horovod.
-  if (platform_gpu_id.value() > 0) {
+  if (platform_device_id.value() > 0) {
    CUcontext pctx;  // We loose track of it. But this is fine.
    if (auto result = cuDevicePrimaryCtxRetain(&pctx, 0))
      LOG(FATAL)  // Crash OK.
@ -65,9 +65,10 @@ GpuCudaMallocAsyncAllocator::GpuCudaMallocAsyncAllocator(

  se::cuda::ScopedActivateExecutorContext scoped_activation{stream_exec_};
  int cuda_malloc_async_supported;
-  if (auto status = cuDeviceGetAttribute(
-          &cuda_malloc_async_supported,
-          CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, platform_gpu_id.value()))
+  if (auto status =
+          cuDeviceGetAttribute(&cuda_malloc_async_supported,
+                               CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED,
+                               platform_device_id.value()))
    LOG(FATAL) <<  // Crash OK.
        "Failed to get device attribute: " << GetCudaErrorMessage(status);
  if (!cuda_malloc_async_supported)
@ -79,12 +80,13 @@ GpuCudaMallocAsyncAllocator::GpuCudaMallocAsyncAllocator(
    LOG(FATAL)  // Crash OK.
        << "Failed to create CUDA stream: " << GetCudaErrorMessage(status);

-  if (auto status = cuDeviceGetDefaultMemPool(&pool_, platform_gpu_id.value()))
+  if (auto status =
+          cuDeviceGetDefaultMemPool(&pool_, platform_device_id.value()))
    LOG(FATAL) <<  // Crash OK.
        "Failed to get default CUDA pool: " << GetCudaErrorMessage(status);

  VLOG(1) << Name() << " CudaMallocAsync initialized on platform: "
-          << platform_gpu_id.value() << " with pool size of: " << pool_size
+          << platform_device_id.value() << " with pool size of: " << pool_size
          << " this ptr: " << this;
  uint64_t pool_size_64 = pool_size;
  if (auto status = cuMemPoolSetAttribute(
--- a/tensorflow/core/common_runtime/gpu/gpu_cudamallocasync_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_cudamallocasync_allocator.h
@ -64,7 +64,7 @@ namespace tensorflow {
 // driver can return the excess memory to other processes.
 class GpuCudaMallocAsyncAllocator : public Allocator {
 public:
-  explicit GpuCudaMallocAsyncAllocator(PlatformGpuId platform_gpu_id,
+  explicit GpuCudaMallocAsyncAllocator(PlatformDeviceId platform_device_id,
                                       size_t pool_size,
                                       bool reserve_memory = false,
                                       bool compute_stats = false);
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc
@ -76,10 +76,10 @@ void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) {
 // GPUDebugAllocator
 // -----------------------------------------------------------------------------
 GPUDebugAllocator::GPUDebugAllocator(Allocator* allocator,
-                                     PlatformGpuId platform_gpu_id)
+                                     PlatformDeviceId platform_device_id)
    : base_allocator_(allocator) {
  stream_exec_ = DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(),
-                                                           platform_gpu_id)
+                                                           platform_device_id)
                     .ValueOrDie();
 }

@ -155,10 +155,10 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) {
 // GPUNanResetAllocator
 // -----------------------------------------------------------------------------
 GPUNanResetAllocator::GPUNanResetAllocator(Allocator* allocator,
-                                           PlatformGpuId platform_gpu_id)
+                                           PlatformDeviceId platform_device_id)
    : base_allocator_(allocator) {
  stream_exec_ = DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(),
-                                                           platform_gpu_id)
+                                                           platform_device_id)
                     .ValueOrDie();
 }

--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h
@ -34,7 +34,7 @@ namespace tensorflow {
 class GPUDebugAllocator : public Allocator {
 public:
  explicit GPUDebugAllocator(Allocator* allocator,
-                             PlatformGpuId platform_gpu_id);
+                             PlatformDeviceId platform_device_id);
  ~GPUDebugAllocator() override;
  string Name() override { return "gpu_debug"; }
  void* AllocateRaw(size_t alignment, size_t num_bytes) override;
@ -64,7 +64,7 @@ class GPUDebugAllocator : public Allocator {
 class GPUNanResetAllocator : public Allocator {
 public:
  explicit GPUNanResetAllocator(Allocator* allocator,
-                                PlatformGpuId platform_gpu_id);
+                                PlatformDeviceId platform_device_id);
  ~GPUNanResetAllocator() override;
  string Name() override { return "gpu_nan_reset"; }
  void* AllocateRaw(size_t alignment, size_t num_bytes) override;
--- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc
@ -37,7 +37,7 @@ limitations under the License.
 namespace tensorflow {
 namespace {

-se::StreamExecutor* ExecutorForPlatformGpuId(
+se::StreamExecutor* ExecutorForPlatformDeviceId(
    PlatformDeviceId platform_device_id) {
  return DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(),
                                                   platform_device_id)
@ -45,12 +45,12 @@ se::StreamExecutor* ExecutorForPlatformGpuId(
 }

 TEST(GPUDebugAllocatorTest, OverwriteDetection_None) {
-  const PlatformGpuId platform_gpu_id(0);
-  auto stream_exec = ExecutorForPlatformGpuId(platform_gpu_id);
+  const PlatformDeviceId platform_device_id(0);
+  auto stream_exec = ExecutorForPlatformDeviceId(platform_device_id);
  DeviceMemAllocator* sub_allocator = new DeviceMemAllocator(
-      stream_exec, platform_gpu_id, false /*use_unified_memory*/, {}, {});
+      stream_exec, platform_device_id, false /*use_unified_memory*/, {}, {});
  GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
-                      platform_gpu_id);
+                      platform_device_id);

  for (int s : {8}) {
    std::vector<int64> cpu_array(s);
@ -72,13 +72,13 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) {
  for (int s : {8, 211}) {
    EXPECT_DEATH(
        {
-          const PlatformGpuId platform_gpu_id(0);
-          auto stream_exec = ExecutorForPlatformGpuId(platform_gpu_id);
+          const PlatformDeviceId platform_device_id(0);
+          auto stream_exec = ExecutorForPlatformDeviceId(platform_device_id);
          DeviceMemAllocator* sub_allocator =
-              new DeviceMemAllocator(stream_exec, platform_gpu_id,
+              new DeviceMemAllocator(stream_exec, platform_device_id,
                                     false /*use_unified_memory*/, {}, {});
          GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
-                              platform_gpu_id);
+                              platform_device_id);

          std::vector<int64> cpu_array(s);
          memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
@ -108,13 +108,13 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
  for (int s : {8, 22}) {
    EXPECT_DEATH(
        {
-          const PlatformGpuId platform_gpu_id(0);
-          auto stream_exec = ExecutorForPlatformGpuId(platform_gpu_id);
+          const PlatformDeviceId platform_device_id(0);
+          auto stream_exec = ExecutorForPlatformDeviceId(platform_device_id);
          DeviceMemAllocator* sub_allocator =
-              new DeviceMemAllocator(stream_exec, platform_gpu_id,
+              new DeviceMemAllocator(stream_exec, platform_device_id,
                                     false /*use_unified_memory*/, {}, {});
          GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
-                              platform_gpu_id);
+                              platform_device_id);

          std::vector<int64> cpu_array(s);
          memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64));
@ -141,12 +141,12 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) {
 }

 TEST(GPUDebugAllocatorTest, ResetToNan) {
-  const PlatformGpuId platform_gpu_id(0);
-  auto stream_exec = ExecutorForPlatformGpuId(platform_gpu_id);
+  const PlatformDeviceId platform_device_id(0);
+  auto stream_exec = ExecutorForPlatformDeviceId(platform_device_id);
  DeviceMemAllocator* sub_allocator = new DeviceMemAllocator(
-      stream_exec, platform_gpu_id, false /*use_unified_memory*/, {}, {});
+      stream_exec, platform_device_id, false /*use_unified_memory*/, {}, {});
  GPUNanResetAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
-                         platform_gpu_id);
+                         platform_device_id);

  std::vector<float> cpu_array(1024);
  std::vector<float> cpu_array_result(1024);
@ -183,15 +183,15 @@ TEST(GPUDebugAllocatorTest, ResetToNan) {
 }

 TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
-  const PlatformGpuId platform_gpu_id(0);
-  auto stream_exec = ExecutorForPlatformGpuId(platform_gpu_id);
+  const PlatformDeviceId platform_device_id(0);
+  auto stream_exec = ExecutorForPlatformDeviceId(platform_device_id);
  // NaN reset must be the outer-most allocator.
  DeviceMemAllocator* sub_allocator = new DeviceMemAllocator(
-      stream_exec, platform_gpu_id, false /*use_unified_memory*/, {}, {});
+      stream_exec, platform_device_id, false /*use_unified_memory*/, {}, {});
  GPUNanResetAllocator a(
      new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
-                            platform_gpu_id),
-      platform_gpu_id);
+                            platform_device_id),
+      platform_device_id);

  std::vector<float> cpu_array(1024);
  std::vector<float> cpu_array_result(1024);
@ -228,24 +228,24 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) {
 }

 TEST(GPUDebugAllocatorTest, TracksSizes) {
-  const PlatformGpuId platform_gpu_id(0);
+  const PlatformDeviceId platform_device_id(0);
  DeviceMemAllocator* sub_allocator = new DeviceMemAllocator(
-      ExecutorForPlatformGpuId(platform_gpu_id), platform_gpu_id,
+      ExecutorForPlatformDeviceId(platform_device_id), platform_device_id,
      false /*use_unified_memory*/, {}, {});
  GPUDebugAllocator a(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
-                      platform_gpu_id);
+                      platform_device_id);
  EXPECT_EQ(true, a.TracksAllocationSizes());
 }

 TEST(GPUDebugAllocatorTest, AllocatedVsRequested) {
-  const PlatformGpuId platform_gpu_id(0);
+  const PlatformDeviceId platform_device_id(0);
  DeviceMemAllocator* sub_allocator = new DeviceMemAllocator(
-      ExecutorForPlatformGpuId(platform_gpu_id), platform_gpu_id,
+      ExecutorForPlatformDeviceId(platform_device_id), platform_device_id,
      false /*use_unified_memory*/, {}, {});
  GPUNanResetAllocator a(
      new GPUDebugAllocator(new GPUBFCAllocator(sub_allocator, 1 << 30, ""),
-                            platform_gpu_id),
-      platform_gpu_id);
+                            platform_device_id),
+      platform_device_id);
  float* t1 = TypedAllocator::Allocate<float>(&a, 1, {});
  EXPECT_EQ(4, a.RequestedSize(t1));
  EXPECT_EQ(256, a.AllocatedSize(t1));
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@ -120,7 +120,7 @@ class EigenGpuStreamDevice : public ::Eigen::StreamInterface {
  }
  ~EigenGpuStreamDevice() override {}
  void Reinitialize(OpKernelContext* context, const gpuStream_t* gpu_stream,
-                    TfGpuId tf_gpu_id, ::tensorflow::Allocator* alloc,
+                    TfDeviceId tf_device_id, ::tensorflow::Allocator* alloc,
                    char* scratch) {
    if (LogMemory::IsEnabled()) {
      operation_ = context->op_kernel().name() + "/EigenAllocator";
@ -132,9 +132,10 @@ class EigenGpuStreamDevice : public ::Eigen::StreamInterface {
        reinterpret_cast<unsigned int*>(scratch + Eigen::kGpuScratchSize);
    stream_ = gpu_stream;
    allocator_ = alloc;
-    PlatformGpuId platform_gpu_id;
-    TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
-    device_prop_ = &Eigen::m_deviceProperties[platform_gpu_id.value()];
+    PlatformDeviceId platform_device_id;
+    TF_CHECK_OK(
+        GpuIdManager::TfToPlatformDeviceId(tf_device_id, &platform_device_id));
+    device_prop_ = &Eigen::m_deviceProperties[platform_device_id.value()];
  }

  const gpuStream_t& stream() const override { return *stream_; }
@ -233,18 +234,18 @@ class EigenGpuStreamDevice : public ::Eigen::StreamInterface {
 class BaseGPUDevice::StreamGroupFactory {
 public:
  // Returns the unique stream group for use with the stream defined by
-  // {tf_gpu_id, stream_group_within_gpu}, creating it if it does not yet
+  // {tf_device_id, stream_group_within_gpu}, creating it if it does not yet
  // exist.
  // This function is thread safe.
-  BaseGPUDevice::StreamGroup* GetOrCreate(TfGpuId tf_gpu_id,
+  BaseGPUDevice::StreamGroup* GetOrCreate(TfDeviceId tf_device_id,
                                          int stream_group_within_gpu,
                                          se::StreamExecutor* executor,
                                          const GPUOptions& options) {
    mutex_lock guard(lock_);
    StreamGroup* group =
-        &streams_[key_type(tf_gpu_id.value(), stream_group_within_gpu)];
+        &streams_[key_type(tf_device_id.value(), stream_group_within_gpu)];
    if (!group->compute) {
-      int priority = GetPriority(tf_gpu_id.value(), options);
+      int priority = GetPriority(tf_device_id.value(), options);
      group->priority = priority;
      group->compute = GetStream(executor, priority);
      group->compute->Init();
@ -339,8 +340,8 @@ class BaseGPUDevice::StreamGroupFactory {
 private:
  // Returns priority for the given virtual GPU id from the session options.
  // Returns 0 if no virtual devices are specified.
-  int GetPriority(int tf_gpu_id, const GPUOptions& options) {
-    int id = tf_gpu_id;
+  int GetPriority(int tf_device_id, const GPUOptions& options) {
+    int id = tf_device_id;
    int i = 0;
    int priority = 0;
    while (i < options.experimental().virtual_devices_size()) {
@ -378,7 +379,7 @@ class BaseGPUDevice::StreamGroupFactory {

 BaseGPUDevice::BaseGPUDevice(const SessionOptions& options, const string& name,
                             Bytes memory_limit, const DeviceLocality& locality,
-                             TfGpuId tf_gpu_id,
+                             TfDeviceId tf_device_id,
                             const string& physical_device_desc,
                             Allocator* gpu_allocator, Allocator* cpu_allocator,
                             bool sync_every_op)
@ -388,7 +389,7 @@ BaseGPUDevice::BaseGPUDevice(const SessionOptions& options, const string& name,
      gpu_allocator_(gpu_allocator),
      cpu_allocator_(cpu_allocator),
      scoped_allocator_mgr_(new ScopedAllocatorMgr(name)),
-      tf_gpu_id_(tf_gpu_id),
+      tf_device_id_(tf_device_id),
      sync_every_op_(sync_every_op) {
  GPUProcessState::singleton()->EnableGPUDevice();
 }
@ -410,7 +411,8 @@ Status BaseGPUDevice::InitScratchBuffers() {
        Allocator::kAllocatorAlignment, scratch_buffer_size);
    if (scratch_buffer == nullptr) {
      return errors::FailedPrecondition(
-          "Failed to allocate scratch buffer for device ", tf_gpu_id_.value());
+          "Failed to allocate scratch buffer for device ",
+          tf_device_id_.value());
    }
    se::DeviceMemory<char> mem(
        se::DeviceMemoryBase(scratch_buffer, scratch_buffer_size));
@ -423,16 +425,16 @@ Status BaseGPUDevice::InitScratchBuffers() {

 Status BaseGPUDevice::Init(const SessionOptions& options) {
  auto executor_status = DeviceIdUtil::ExecutorForTfDeviceId(
-      DEVICE_GPU, GPUMachineManager(), tf_gpu_id_);
+      DEVICE_GPU, GPUMachineManager(), tf_device_id_);
  if (!executor_status.status().ok()) {
    return errors::Internal("Failed to get StreamExecutor for device ",
-                            tf_gpu_id_.value());
+                            tf_device_id_.value());
  }

  executor_ = executor_status.ValueOrDie();

  stream_ = StreamGroupFactory::Global().GetOrCreate(
-      tf_gpu_id_, 0, executor_, options.config.gpu_options());
+      tf_device_id_, 0, executor_, options.config.gpu_options());
  device_context_ =
      new GPUDeviceContext(0, stream_->compute,
 #if TENSORFLOW_USE_ROCM
@ -461,7 +463,7 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
      // The GPUKernelTracker will use this SharedCounter, instead of
      // owning its own.
      timing_counter =
-          GPUProcessState::singleton()->GPUAllocatorCounter(tf_gpu_id_);
+          GPUProcessState::singleton()->GPUAllocatorCounter(tf_device_id_);
      DCHECK(timing_counter);
    }
    kernel_tracker_.reset(new GPUKernelTracker(
@ -473,10 +475,10 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
  gpu_device_info_->stream = stream_->compute;
  gpu_device_info_->default_context = device_context_;
  gpu_device_info_->event_mgr = em_;
-  PlatformGpuId platform_gpu_id;
+  PlatformDeviceId platform_device_id;
  TF_RETURN_IF_ERROR(
-      GpuIdManager::TfToPlatformGpuId(tf_gpu_id_, &platform_gpu_id));
-  gpu_device_info_->gpu_id = platform_gpu_id.value();
+      GpuIdManager::TfToPlatformDeviceId(tf_device_id_, &platform_device_id));
+  gpu_device_info_->gpu_id = platform_device_id.value();
  set_tensorflow_gpu_device_info(gpu_device_info_);

  // Whether and how the GPU device uses its own threadpool.
@ -505,7 +507,7 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
      // TODO(zhengxq): pin the thread to the same socket of the target GPU.
      thread_pool_.reset(new thread::ThreadPool(
          options.env, ThreadOptions(),
-          strings::StrCat("gpu_private_", tf_gpu_id_.value()),
+          strings::StrCat("gpu_private_", tf_device_id_.value()),
          static_cast<int32>(gpu_thread_count),
          !options.config.experimental().disable_thread_spinning(),
          /*allocator=*/nullptr));
@ -531,8 +533,8 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
 string BaseGPUDevice::ComputeOpKernelDebugString(const OpKernel& op_kernel,
                                                 const int& stream_id) {
  return strings::StrCat(op_kernel.name(), " op ", op_kernel.type_string(),
-                         " on GPU ", tf_gpu_id_.value(), " stream[", stream_id,
-                         "]");
+                         " on GPU ", tf_device_id_.value(), " stream[",
+                         stream_id, "]");
 }

 void BaseGPUDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
@ -624,8 +626,8 @@ void BaseGPUDevice::ComputeAsync(AsyncOpKernel* op_kernel,
  const auto stream_id = gpu_device_context->stream_id();

  VLOG(1) << "GpuDevice::ComputeAsync " << op_kernel->name() << " op "
-          << op_kernel->type_string() << " on GPU" << tf_gpu_id_ << " stream["
-          << stream_id << "]";
+          << op_kernel->type_string() << " on GPU" << tf_device_id_
+          << " stream[" << stream_id << "]";

  ScopedActivateExecutorContext scoped_activation{stream->parent()};
  op_kernel->ComputeAsync(context, std::move(done));
@ -763,10 +765,10 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
  ConcretePerOpGpuDevice() : device_(&stream_device_) {}

  void Reinitialize(OpKernelContext* context, const gpuStream_t* gpu_stream,
-                    TfGpuId tf_gpu_id, Allocator* base_allocator,
+                    TfDeviceId tf_device_id, Allocator* base_allocator,
                    char* scratch) {
-    stream_device_.Reinitialize(context, gpu_stream, tf_gpu_id, base_allocator,
-                                scratch);
+    stream_device_.Reinitialize(context, gpu_stream, tf_device_id,
+                                base_allocator, scratch);
  }

  const Eigen::GpuDevice& device() const override { return device_; }
@ -777,8 +779,9 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
 };

 // Parse 'visible_device_list' into a list of platform GPU ids.
-Status ParseVisibleDeviceList(const string& visible_device_list,
-                              std::vector<PlatformGpuId>* visible_gpu_order) {
+Status ParseVisibleDeviceList(
+    const string& visible_device_list,
+    std::vector<PlatformDeviceId>* visible_gpu_order) {
  visible_gpu_order->clear();
  se::Platform* gpu_manager = GPUMachineManager();

@ -793,28 +796,28 @@ Status ParseVisibleDeviceList(const string& visible_device_list,
  } else {
    const std::vector<string> order_str =
        str_util::Split(visible_device_list, ',');
-    for (const string& platform_gpu_id_str : order_str) {
-      int32 platform_gpu_id;
-      if (!strings::safe_strto32(platform_gpu_id_str, &platform_gpu_id)) {
+    for (const string& platform_device_id_str : order_str) {
+      int32 platform_device_id;
+      if (!strings::safe_strto32(platform_device_id_str, &platform_device_id)) {
        return errors::InvalidArgument(
            "Could not parse entry in 'visible_device_list': '",
-            platform_gpu_id_str,
+            platform_device_id_str,
            "'. visible_device_list = ", visible_device_list);
      }
-      if (platform_gpu_id < 0 ||
-          platform_gpu_id >= gpu_manager->VisibleDeviceCount()) {
+      if (platform_device_id < 0 ||
+          platform_device_id >= gpu_manager->VisibleDeviceCount()) {
        return errors::InvalidArgument(
-            "'visible_device_list' listed an invalid GPU id '", platform_gpu_id,
-            "' but visible device count is ",
+            "'visible_device_list' listed an invalid GPU id '",
+            platform_device_id, "' but visible device count is ",
            gpu_manager->VisibleDeviceCount());
      }
-      visible_gpu_order->push_back(PlatformGpuId(platform_gpu_id));
+      visible_gpu_order->push_back(PlatformDeviceId(platform_device_id));
    }
  }

  // Validate no repeats.
-  std::set<PlatformGpuId> visible_device_set(visible_gpu_order->begin(),
-                                             visible_gpu_order->end());
+  std::set<PlatformDeviceId> visible_device_set(visible_gpu_order->begin(),
+                                                visible_gpu_order->end());
  if (visible_device_set.size() != visible_gpu_order->size()) {
    return errors::InvalidArgument(
        "visible_device_list contained a duplicate entry: ",
@ -825,8 +828,8 @@ Status ParseVisibleDeviceList(const string& visible_device_list,

 Status VerifyVirtualDeviceSettings(
    const size_t num_gpus_to_use, const GPUOptions& gpu_options,
-    const std::vector<PlatformGpuId>& visible_gpu_order,
-    const std::vector<PlatformGpuId>& valid_platform_gpu_ids,
+    const std::vector<PlatformDeviceId>& visible_gpu_order,
+    const std::vector<PlatformDeviceId>& valid_platform_device_ids,
    const std::map<int, std::pair<int, int>>& supported_priority_ranges) {
  const auto& virtual_devices = gpu_options.experimental().virtual_devices();
  CHECK(!virtual_devices.empty());
@ -849,11 +852,11 @@ Status VerifyVirtualDeviceSettings(
        " #GPUs in visible_device_list: ", visible_gpu_order.size(),
        " virtual_devices.size(): ", virtual_devices.size());
  }
-  if (valid_platform_gpu_ids.size() != virtual_devices.size()) {
+  if (valid_platform_device_ids.size() != virtual_devices.size()) {
    return errors::Unknown(
        "The number of valid GPUs doesn't match the number of elements in "
        "the virtual_devices list.",
-        " #valid GPUs: ", valid_platform_gpu_ids.size(),
+        " #valid GPUs: ", valid_platform_device_ids.size(),
        " virtual_devices.size(): ", virtual_devices.size());
  }
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
@ -882,7 +885,7 @@ Status VerifyVirtualDeviceSettings(
          i, " memory_limit_mb size: ", memory_limit_mb.size(),
          " and priority size: ", priority.size());
    }
-    const int gpu_id = valid_platform_gpu_ids[i].value();
+    const int gpu_id = valid_platform_device_ids[i].value();
    auto it = supported_priority_ranges.find(gpu_id);
    if (it == supported_priority_ranges.end()) {
      return errors::Internal(
@ -950,19 +953,19 @@ int64 MinSystemMemory(int64 available_memory, int cc_major) {
 }

 // Get the memory limit for the virtual device being created on GPU with
-// 'platform_gpu_id', when that virtual device is the only virtual device being
-// created on that GPU.
+// 'platform_device_id', when that virtual device is the only virtual device
+// being created on that GPU.
 Status SingleVirtualDeviceMemoryLimit(const GPUOptions& gpu_options,
-                                      PlatformGpuId platform_gpu_id,
+                                      PlatformDeviceId platform_device_id,
                                      int64* memory_limit) {
  int64 total_memory = 0;
  int64 available_memory = 0;
  se::StreamExecutor* se = DeviceIdUtil::ExecutorForPlatformDeviceId(
-                               GPUMachineManager(), platform_gpu_id)
+                               GPUMachineManager(), platform_device_id)
                               .ValueOrDie();
  if (!se->DeviceMemoryUsage(&available_memory, &total_memory)) {
    return errors::Unknown("Failed to query available memory for GPU ",
-                           platform_gpu_id.value());
+                           platform_device_id.value());
  }

  int64 allocated_memory = 0;
@ -1037,7 +1040,7 @@ void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context,
  DCHECK_EQ(stream_id, 0);
  const gpuStream_t* gpu_stream = reinterpret_cast<const gpuStream_t*>(
      stream_->compute->implementation()->GpuStreamMemberHack());
-  concrete_device->Reinitialize(context, gpu_stream, tf_gpu_id_, allocator,
+  concrete_device->Reinitialize(context, gpu_stream, tf_device_id_, allocator,
                                scratch_);
 }

@ -1093,7 +1096,7 @@ Status BaseGPUDeviceFactory::CacheDeviceIds() {
    return Status::OK();
  }

-  std::vector<PlatformGpuId> visible_gpu_order(device_count);
+  std::vector<PlatformDeviceId> visible_gpu_order(device_count);
  std::iota(visible_gpu_order.begin(), visible_gpu_order.end(), 0);
  TF_RETURN_IF_ERROR(GetValidDeviceIds(visible_gpu_order, &cached_device_ids_));
  return Status::OK();
@ -1101,9 +1104,9 @@ Status BaseGPUDeviceFactory::CacheDeviceIds() {

 Status BaseGPUDeviceFactory::ListPhysicalDevices(std::vector<string>* devices) {
  TF_RETURN_IF_ERROR(CacheDeviceIds());
-  for (PlatformGpuId platform_gpu_id : cached_device_ids_) {
+  for (PlatformDeviceId platform_device_id : cached_device_ids_) {
    const string device_name =
-        strings::StrCat("/physical_device:GPU:", platform_gpu_id.value());
+        strings::StrCat("/physical_device:GPU:", platform_device_id.value());
    devices->push_back(device_name);
  }

@ -1117,14 +1120,15 @@ Status BaseGPUDeviceFactory::GetDeviceDetails(
  if (device_index < 0 || device_index > cached_device_ids_.size()) {
    return errors::Internal("Invalid device index: ", device_index);
  }
-  PlatformGpuId platform_gpu_id = cached_device_ids_[device_index];
+  PlatformDeviceId platform_device_id = cached_device_ids_[device_index];

  TF_RETURN_IF_ERROR(ValidateGPUMachineManager());
  se::Platform* gpu_manager = GPUMachineManager();
  if (gpu_manager == nullptr) {
    return errors::Internal("Cannot get GPUMachineManager");
  }
-  auto desc_status = gpu_manager->DescriptionForDevice(platform_gpu_id.value());
+  auto desc_status =
+      gpu_manager->DescriptionForDevice(platform_device_id.value());
  if (!desc_status.ok()) {
    return desc_status.status();
  }
@ -1159,8 +1163,8 @@ Status BaseGPUDeviceFactory::CreateDevices(
    num_gpus_to_use = iter->second;
  }
  const auto& gpu_options = options.config.gpu_options();
-  std::vector<PlatformGpuId> visible_gpu_order;
-  std::vector<PlatformGpuId> valid_platform_gpu_ids;
+  std::vector<PlatformDeviceId> visible_gpu_order;
+  std::vector<PlatformDeviceId> valid_platform_device_ids;
  // If we aren't going to use any GPUs, don't initialize them.
  // We don't want to call ParseVisibleDeviceList if num_gpus_to_use is 0,
  // because it treats an empty gpu_options.visible_device_list as 'all GPUs
@ -1188,13 +1192,13 @@ Status BaseGPUDeviceFactory::CreateDevices(
    }

    TF_RETURN_IF_ERROR(
-        GetValidDeviceIds(visible_gpu_order, &valid_platform_gpu_ids));
+        GetValidDeviceIds(visible_gpu_order, &valid_platform_device_ids));
  }
-  if (num_gpus_to_use > valid_platform_gpu_ids.size()) {
-    num_gpus_to_use = valid_platform_gpu_ids.size();
+  if (num_gpus_to_use > valid_platform_device_ids.size()) {
+    num_gpus_to_use = valid_platform_device_ids.size();
  }
  std::map<int, std::pair<int, int>> supported_priority_ranges;
-  if (!valid_platform_gpu_ids.empty()) {
+  if (!valid_platform_device_ids.empty()) {
    // Save the original device.
    int original_device = 0;
 #if GOOGLE_CUDA
@ -1213,18 +1217,18 @@ Status BaseGPUDeviceFactory::CreateDevices(

    // Force to implicitly initialize CUDA runtime on each valid GPU before
    // CreateGPUDevice().
-    for (PlatformGpuId platform_gpu_id : valid_platform_gpu_ids) {
+    for (PlatformDeviceId platform_device_id : valid_platform_device_ids) {
 #if GOOGLE_CUDA
-      err = cudaSetDevice(platform_gpu_id.value());
+      err = cudaSetDevice(platform_device_id.value());
      if (err != cudaSuccess) {
        return errors::Internal(
-            "cudaSetDevice() on GPU:", platform_gpu_id.value(),
+            "cudaSetDevice() on GPU:", platform_device_id.value(),
            " failed. Status: ", cudaGetErrorString(err));
      }
      err = cudaFree(nullptr);
      if (err != cudaSuccess) {
        return errors::Internal("CUDA runtime implicit initialization on GPU:",
-                                platform_gpu_id.value(),
+                                platform_device_id.value(),
                                " failed. Status: ", cudaGetErrorString(err));
      }
      int priority_low, priority_high;
@ -1237,19 +1241,19 @@ Status BaseGPUDeviceFactory::CreateDevices(
      VLOG(1) << "Cuda stream priority range on GPU(" << original_device
              << "): " << priority_high << "," << priority_low;
      supported_priority_ranges.insert(
-          std::make_pair(platform_gpu_id.value(),
+          std::make_pair(platform_device_id.value(),
                         std::make_pair(priority_low, priority_high)));
 #elif TENSORFLOW_USE_ROCM
-      err = hipSetDevice(platform_gpu_id.value());
+      err = hipSetDevice(platform_device_id.value());
      if (err != hipSuccess) {
        return errors::Internal(
-            "hipSetDevice() on GPU:", platform_gpu_id.value(),
+            "hipSetDevice() on GPU:", platform_device_id.value(),
            " failed. Status: ", hipGetErrorString(err));
      }
      err = hipFree(nullptr);
      if (err != hipSuccess) {
        return errors::Internal("ROCm runtime implicit initialization on GPU:",
-                                platform_gpu_id.value(),
+                                platform_device_id.value(),
                                " failed. Status: ", hipGetErrorString(err));
      }
      int priority_low, priority_high;
@ -1262,7 +1266,7 @@ Status BaseGPUDeviceFactory::CreateDevices(
      VLOG(1) << "HIP stream priority range on GPU(" << original_device
              << "): " << priority_high << "," << priority_low;
      supported_priority_ranges.insert(
-          std::make_pair(platform_gpu_id.value(),
+          std::make_pair(platform_device_id.value(),
                         std::make_pair(priority_low, priority_high)));
 #endif
    }
@ -1306,9 +1310,9 @@ Status BaseGPUDeviceFactory::CreateDevices(
    LOG(INFO) << line_buf;
    for (int i = 0; i < visible_gpu_order.size(); ++i) {
      line_buf = strings::StrCat(visible_gpu_order[i].value(), ":   ");
-      PlatformGpuId gpu_id_i = visible_gpu_order[i];
+      PlatformDeviceId gpu_id_i = visible_gpu_order[i];
      for (int j = 0; j < visible_gpu_order.size(); ++j) {
-        PlatformGpuId gpu_id_j = visible_gpu_order[j];
+        PlatformDeviceId gpu_id_j = visible_gpu_order[j];
        if (im.directed_links.find({gpu_id_i, gpu_id_j}) !=
            im.directed_links.end()) {
          line_buf.append("Y ");
@ -1323,22 +1327,23 @@ Status BaseGPUDeviceFactory::CreateDevices(
  const auto& virtual_devices = gpu_options.experimental().virtual_devices();
  if (!virtual_devices.empty()) {
    TF_RETURN_IF_ERROR(VerifyVirtualDeviceSettings(
-        num_gpus_to_use, gpu_options, visible_gpu_order, valid_platform_gpu_ids,
-        supported_priority_ranges));
+        num_gpus_to_use, gpu_options, visible_gpu_order,
+        valid_platform_device_ids, supported_priority_ranges));
    // We've verified that num_gpus_to_use >= virtual_devices.size().
    num_gpus_to_use = virtual_devices.size();
    CHECK(gpu_options.visible_device_list().empty() ||
-          valid_platform_gpu_ids == visible_gpu_order);
+          valid_platform_device_ids == visible_gpu_order);
  }
-  int next_tf_gpu_id = 0;
+  int next_tf_device_id = 0;
  std::vector<int64> memory_limit_bytes;
  for (int i = 0; i < num_gpus_to_use; ++i) {
-    const PlatformGpuId platform_gpu_id = valid_platform_gpu_ids[i];
+    const PlatformDeviceId platform_device_id = valid_platform_device_ids[i];
    if (virtual_devices.empty() ||
        virtual_devices.Get(i).memory_limit_mb_size() == 0) {
      int64 single_virtual_device_memory_limit = 0;
-      TF_RETURN_IF_ERROR(SingleVirtualDeviceMemoryLimit(
-          gpu_options, platform_gpu_id, &single_virtual_device_memory_limit));
+      TF_RETURN_IF_ERROR(
+          SingleVirtualDeviceMemoryLimit(gpu_options, platform_device_id,
+                                         &single_virtual_device_memory_limit));
      memory_limit_bytes.push_back(single_virtual_device_memory_limit);
    } else {
      const auto& memory_limit_mb = virtual_devices.Get(i).memory_limit_mb();
@ -1347,36 +1352,37 @@ Status BaseGPUDeviceFactory::CreateDevices(
                       return static_cast<int64>(mb) * (1ll << 20);
                     });
    }
-    while (next_tf_gpu_id < memory_limit_bytes.size()) {
-      TfGpuId tf_gpu_id(next_tf_gpu_id);
-      ++next_tf_gpu_id;
-      TF_RETURN_IF_ERROR(
-          GpuIdManager::InsertTfPlatformGpuIdPair(tf_gpu_id, platform_gpu_id));
+    while (next_tf_device_id < memory_limit_bytes.size()) {
+      TfDeviceId tf_device_id(next_tf_device_id);
+      ++next_tf_device_id;
+      TF_RETURN_IF_ERROR(GpuIdManager::InsertTfPlatformDeviceIdPair(
+          tf_device_id, platform_device_id));
    }
  }
-  const int num_tf_gpus = next_tf_gpu_id;
+  const int num_tf_gpus = next_tf_device_id;

  LocalityMap device_localities;
  TF_RETURN_IF_ERROR(
      GetDeviceLocalities(num_tf_gpus, interconnect_maps, &device_localities));

  // Build the GPUDevices
-  CHECK_EQ(next_tf_gpu_id, memory_limit_bytes.size());
+  CHECK_EQ(next_tf_device_id, memory_limit_bytes.size());
  for (int di = 0; di < num_tf_gpus; ++di) {
-    TfGpuId tf_gpu_id(di);
+    TfDeviceId tf_device_id(di);
    int64 bytes = memory_limit_bytes[di];
-    auto it = device_localities.find(tf_gpu_id);
+    auto it = device_localities.find(tf_device_id);
    if (it == device_localities.end()) {
      return errors::Internal("Failed to find DeviceLocality for GPU device ",
-                              tf_gpu_id.value());
+                              tf_device_id.value());
    }
-    TF_RETURN_IF_ERROR(CreateGPUDevice(options, name_prefix, tf_gpu_id, bytes,
-                                       it->second, num_tf_gpus, devices));
+    TF_RETURN_IF_ERROR(CreateGPUDevice(options, name_prefix, tf_device_id,
+                                       bytes, it->second, num_tf_gpus,
+                                       devices));
  }
  return Status::OK();
 }

-static string GetShortDeviceDescription(PlatformGpuId platform_gpu_id,
+static string GetShortDeviceDescription(PlatformDeviceId platform_device_id,
                                        const se::DeviceDescription& desc) {
 #if GOOGLE_CUDA
  int cc_major;
@ -1386,54 +1392,56 @@ static string GetShortDeviceDescription(PlatformGpuId platform_gpu_id,
    cc_minor = 0;
  }
  // LINT.IfChange
-  return strings::StrCat("device: ", platform_gpu_id.value(),
+  return strings::StrCat("device: ", platform_device_id.value(),
                         ", name: ", desc.name(),
                         ", pci bus id: ", desc.pci_bus_id(),
                         ", compute capability: ", cc_major, ".", cc_minor);
  // LINT.ThenChange(//tensorflow/python/framework/gpu_util.py)
 #elif TENSORFLOW_USE_ROCM
-  return strings::StrCat("device: ", platform_gpu_id.value(),
+  return strings::StrCat("device: ", platform_device_id.value(),
                         ", name: ", desc.name(),
                         ", pci bus id: ", desc.pci_bus_id());
 #endif
 }

 Status BaseGPUDeviceFactory::CreateGPUDevice(
-    const SessionOptions& options, const string& name_prefix, TfGpuId tf_gpu_id,
-    int64 memory_limit, const DeviceLocality& dev_locality, size_t num_tf_gpus,
+    const SessionOptions& options, const string& name_prefix,
+    TfDeviceId tf_device_id, int64 memory_limit,
+    const DeviceLocality& dev_locality, size_t num_tf_gpus,
    std::vector<std::unique_ptr<Device>>* devices) {
-  CHECK_GE(tf_gpu_id.value(), 0);
+  CHECK_GE(tf_device_id.value(), 0);
  const string device_name =
-      strings::StrCat(name_prefix, "/device:GPU:", tf_gpu_id.value());
+      strings::StrCat(name_prefix, "/device:GPU:", tf_device_id.value());
  DeviceIdUtil::CheckValidTfDeviceId(DEVICE_GPU, GPUMachineManager(),
-                                     tf_gpu_id);
-  PlatformGpuId platform_gpu_id;
+                                     tf_device_id);
+  PlatformDeviceId platform_device_id;
  TF_RETURN_IF_ERROR(
-      GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
+      GpuIdManager::TfToPlatformDeviceId(tf_device_id, &platform_device_id));
  int numa_node = dev_locality.numa_node();

  se::Platform* gpu_manager = GPUMachineManager();
-  auto desc_status = gpu_manager->DescriptionForDevice(platform_gpu_id.value());
+  auto desc_status =
+      gpu_manager->DescriptionForDevice(platform_device_id.value());
  if (!desc_status.ok()) {
    return desc_status.status();
  }
  auto desc = desc_status.ConsumeValueOrDie();

-  std::vector<TfGpuId> peer_gpu_ids;
+  std::vector<TfDeviceId> peer_gpu_ids;
  peer_gpu_ids.reserve(num_tf_gpus);
  for (int id = 0; id < num_tf_gpus; ++id) {
-    TfGpuId peer_tf_gpu_id(id);
-    if (peer_tf_gpu_id != tf_gpu_id) {
-      peer_gpu_ids.push_back(peer_tf_gpu_id);
+    TfDeviceId peer_tf_device_id(id);
+    if (peer_tf_device_id != tf_device_id) {
+      peer_gpu_ids.push_back(peer_tf_device_id);
    }
  }

  GPUProcessState* process_state = GPUProcessState::singleton();
  Allocator* gpu_allocator = process_state->GetGPUAllocator(
-      options.config.gpu_options(), tf_gpu_id, memory_limit, peer_gpu_ids);
+      options.config.gpu_options(), tf_device_id, memory_limit, peer_gpu_ids);
  if (gpu_allocator == nullptr) {
    return errors::Internal("Failed to get memory allocator for TF GPU ",
-                            tf_gpu_id.value(), " with ", memory_limit,
+                            tf_device_id.value(), " with ", memory_limit,
                            " bytes of memory.");
  }
  absl::optional<AllocatorStats> stats = gpu_allocator->GetStats();
@ -1441,7 +1449,7 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(
    return errors::Internal("No allocator statistics");
  }
  // 'memory_limit' is the required memory size, but if the allocator with
-  // given tf_gpu_id was created before, we'll use it instead of creating a
+  // given tf_device_id was created before, we'll use it instead of creating a
  // new one (as TF gpu device is a shared resource), in which case the actual
  // memory limit represented by 'stats.bytes_limit' used by that allocator
  // may be different (which should be an error).
@ -1451,11 +1459,11 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(
  int64 bytes_limit = stats->bytes_limit ? *stats->bytes_limit : 0;
  std::unique_ptr<BaseGPUDevice> gpu_device = CreateGPUDevice(
      options, device_name, static_cast<Bytes>(bytes_limit), dev_locality,
-      tf_gpu_id, GetShortDeviceDescription(platform_gpu_id, *desc),
+      tf_device_id, GetShortDeviceDescription(platform_device_id, *desc),
      gpu_allocator, ProcessState::singleton()->GetCPUAllocator(numa_node));
  LOG(INFO) << "Created TensorFlow device (" << device_name << " with "
            << (bytes_limit >> 20) << " MB memory) -> physical GPU ("
-            << GetShortDeviceDescription(platform_gpu_id, *desc) << ")";
+            << GetShortDeviceDescription(platform_device_id, *desc) << ")";
  TF_RETURN_IF_ERROR(gpu_device->Init(options));
  devices->push_back(std::move(gpu_device));

@ -1463,13 +1471,13 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(
 }

 namespace {
-std::unique_ptr<std::map<std::pair<PlatformGpuId, PlatformGpuId>, bool>>
+std::unique_ptr<std::map<std::pair<PlatformDeviceId, PlatformDeviceId>, bool>>
 GetPeerAccessMap(se::Platform* platform,
-                 const std::vector<PlatformGpuId>& visible_gpu_order) {
-  std::unique_ptr<std::map<std::pair<PlatformGpuId, PlatformGpuId>, bool>> map(
-      new std::map<std::pair<PlatformGpuId, PlatformGpuId>, bool>);
-  for (PlatformGpuId platform_gpu_i : visible_gpu_order) {
-    for (PlatformGpuId platform_gpu_j : visible_gpu_order) {
+                 const std::vector<PlatformDeviceId>& visible_gpu_order) {
+  std::unique_ptr<std::map<std::pair<PlatformDeviceId, PlatformDeviceId>, bool>>
+      map(new std::map<std::pair<PlatformDeviceId, PlatformDeviceId>, bool>);
+  for (PlatformDeviceId platform_gpu_i : visible_gpu_order) {
+    for (PlatformDeviceId platform_gpu_j : visible_gpu_order) {
      se::StreamExecutor* from =
          DeviceIdUtil::ExecutorForPlatformDeviceId(platform, platform_gpu_i)
              .ValueOrDie();
@ -1487,7 +1495,7 @@ GetPeerAccessMap(se::Platform* platform,
 }  // namespace

 Status BaseGPUDeviceFactory::GetInterconnectMaps(
-    const std::vector<PlatformGpuId>& visible_gpu_order,
+    const std::vector<PlatformDeviceId>& visible_gpu_order,
    se::Platform* gpu_manager, std::vector<InterconnectMap>* maps) {
  // The default interconnect map is obtained from the StreamExecutor.
  auto access_map = GetPeerAccessMap(gpu_manager, visible_gpu_order);
@ -1495,8 +1503,8 @@ Status BaseGPUDeviceFactory::GetInterconnectMaps(
  InterconnectMap& imap = maps->at(0);
  imap.name = "StreamExecutor";
  imap.strength = InterconnectMap::kStreamExecutorStrength;
-  for (PlatformGpuId gpu_id_i : visible_gpu_order) {
-    for (PlatformGpuId gpu_id_j : visible_gpu_order) {
+  for (PlatformDeviceId gpu_id_i : visible_gpu_order) {
+    for (PlatformDeviceId gpu_id_j : visible_gpu_order) {
      if (gpu_id_i == gpu_id_j) continue;
      if ((*access_map)[{gpu_id_i, gpu_id_j}]) {
        imap.directed_links.insert({gpu_id_i, gpu_id_j});
@ -1509,21 +1517,21 @@ Status BaseGPUDeviceFactory::GetInterconnectMaps(
 Status BaseGPUDeviceFactory::GetDeviceLocalities(
    int num_tf_gpus, const std::vector<InterconnectMap>& interconnects,
    LocalityMap* localities) {
-  std::vector<TfGpuId> all_tf_gpu_ids;
-  all_tf_gpu_ids.reserve(num_tf_gpus);
+  std::vector<TfDeviceId> all_tf_device_ids;
+  all_tf_device_ids.reserve(num_tf_gpus);
  for (int i = 0; i < num_tf_gpus; ++i) {
-    all_tf_gpu_ids.push_back(TfGpuId(i));
+    all_tf_device_ids.push_back(TfDeviceId(i));
  }
-  for (TfGpuId tf_gpu_id : all_tf_gpu_ids) {
-    PlatformGpuId platform_gpu_id;
+  for (TfDeviceId tf_device_id : all_tf_device_ids) {
+    PlatformDeviceId platform_device_id;
    TF_RETURN_IF_ERROR(
-        GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
+        GpuIdManager::TfToPlatformDeviceId(tf_device_id, &platform_device_id));
    // Get GPU bus_id from its reported NUMA affinity.  Because GPUs are
    // virtualized in some environments, we can't just use the GPU id.
    // NUMA locales are indexed from 0, buses are indexed from 1.
    se::Platform* gpu_manager = GPUMachineManager();
    auto desc_status =
-        gpu_manager->DescriptionForDevice(platform_gpu_id.value());
+        gpu_manager->DescriptionForDevice(platform_device_id.value());
    if (!desc_status.ok()) {
      return desc_status.status();
    }
@ -1537,7 +1545,7 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
      // trouble may manifest as slower than expected performance, or
      // outright failures.
      LOG(INFO) << "Could not identify NUMA node of platform GPU id "
-                << platform_gpu_id
+                << platform_device_id
                << ", defaulting to 0.  Your kernel may not have been built "
                << "with NUMA support.";
      numa_node = 0;
@ -1549,11 +1557,11 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
    // Set LocalLinks from InterconnectMaps.
    LocalLinks* links = dev_locality.mutable_links();
    for (const InterconnectMap& imap : interconnects) {
-      for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) {
-        PlatformGpuId platform_gpu_dst;
+      for (TfDeviceId tf_gpu_dst : all_tf_device_ids) {
+        PlatformDeviceId platform_gpu_dst;
        TF_RETURN_IF_ERROR(
-            GpuIdManager::TfToPlatformGpuId(tf_gpu_dst, &platform_gpu_dst));
-        if (imap.directed_links.find({platform_gpu_id, platform_gpu_dst}) !=
+            GpuIdManager::TfToPlatformDeviceId(tf_gpu_dst, &platform_gpu_dst));
+        if (imap.directed_links.find({platform_device_id, platform_gpu_dst}) !=
            imap.directed_links.end()) {
          InterconnectLink* ilink = links->add_link();
          ilink->set_device_id(tf_gpu_dst.value());
@ -1565,12 +1573,12 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(

    // If this is one of multiple virtual GPUs on the same physical GPU
    // add high strength links to the others.
-    for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) {
-      if (tf_gpu_id == tf_gpu_dst) continue;
-      PlatformGpuId platform_gpu_dst;
+    for (TfDeviceId tf_gpu_dst : all_tf_device_ids) {
+      if (tf_device_id == tf_gpu_dst) continue;
+      PlatformDeviceId platform_gpu_dst;
      TF_RETURN_IF_ERROR(
-          GpuIdManager::TfToPlatformGpuId(tf_gpu_dst, &platform_gpu_dst));
-      if (platform_gpu_id == platform_gpu_dst) {
+          GpuIdManager::TfToPlatformDeviceId(tf_gpu_dst, &platform_gpu_dst));
+      if (platform_device_id == platform_gpu_dst) {
        InterconnectLink* ilink = links->add_link();
        ilink->set_device_id(tf_gpu_dst.value());
        ilink->set_type("SAME_DEVICE");
@ -1578,10 +1586,11 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(
      }
    }

-    (*localities)[tf_gpu_id] = dev_locality;
-    VLOG(1) << "GPUDevice PlatformGpuId " << platform_gpu_id << " TfGpuId "
-            << tf_gpu_id << " on bus " << dev_locality.bus_id()
-            << " numa: " << numa_node << " pci: " << desc->pci_bus_id()
+    (*localities)[tf_device_id] = dev_locality;
+    VLOG(1) << "GPUDevice PlatformDeviceId " << platform_device_id
+            << " TfDeviceId " << tf_device_id << " on bus "
+            << dev_locality.bus_id() << " numa: " << numa_node
+            << " pci: " << desc->pci_bus_id()
            << " DeviceLocality: " << dev_locality.DebugString();
  }
  return Status::OK();
@ -1589,7 +1598,7 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities(

 static int GetDefaultMinGPUMultiprocessorCount(
    se::Platform* gpu_manager,
-    const std::vector<PlatformGpuId>& visible_gpu_order) {
+    const std::vector<PlatformDeviceId>& visible_gpu_order) {
  static const int kDefaultMinGPUMultiprocessorCount = 8;

  // Find the highest multi-processor count across all visible GPUs.
@ -1614,7 +1623,7 @@ static int GetDefaultMinGPUMultiprocessorCount(

 static int GetMinGPUMultiprocessorCount(
    se::Platform* gpu_manager,
-    const std::vector<PlatformGpuId>& visible_gpu_order) {
+    const std::vector<PlatformDeviceId>& visible_gpu_order) {
  const char* tf_min_gpu_core_count = getenv("TF_MIN_GPU_MULTIPROCESSOR_COUNT");

  if (tf_min_gpu_core_count == nullptr ||
@ -1704,14 +1713,14 @@ std::vector<int> GetSupportedAMDGPUISAVersions() {
 }  // namespace

 Status BaseGPUDeviceFactory::EnablePeerAccess(
-    const std::vector<PlatformGpuId>& visible_gpu_order) {
+    const std::vector<PlatformDeviceId>& visible_gpu_order) {
  se::Platform* gpu_manager = GPUMachineManager();
  int possible_peer_count = 0;
  int enabled_peer_count = 0;
  for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    const PlatformGpuId platform_gpu_i = visible_gpu_order[i];
+    const PlatformDeviceId platform_gpu_i = visible_gpu_order[i];
    for (int j = 0; j < visible_gpu_order.size(); ++j) {
-      const PlatformGpuId platform_gpu_j = visible_gpu_order[j];
+      const PlatformDeviceId platform_gpu_j = visible_gpu_order[j];
      // We have already validated that ExecutorForDevice() calls return OK.
      se::StreamExecutor* from =
          DeviceIdUtil::ExecutorForPlatformDeviceId(gpu_manager, platform_gpu_i)
@ -1748,8 +1757,8 @@ Status BaseGPUDeviceFactory::EnablePeerAccess(
 }

 Status BaseGPUDeviceFactory::GetValidDeviceIds(
-    const std::vector<PlatformGpuId>& visible_gpu_order,
-    std::vector<PlatformGpuId>* ids) {
+    const std::vector<PlatformDeviceId>& visible_gpu_order,
+    std::vector<PlatformDeviceId>* ids) {
  se::Platform* gpu_manager = GPUMachineManager();
  for (int i = 0; i < visible_gpu_order.size(); ++i) {
    int visible_gpu_id = visible_gpu_order[i].value();
@ -1834,7 +1843,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(

  // Filter out devices that don't have the right capability or power.
  for (int i = 0; i < visible_gpu_order.size(); ++i) {
-    const PlatformGpuId visible_gpu_id = visible_gpu_order[i];
+    const PlatformDeviceId visible_gpu_id = visible_gpu_order[i];
    auto description_status =
        gpu_manager->DescriptionForDevice(visible_gpu_id.value());
    if (!description_status.ok()) {
@ -1904,7 +1913,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
  if (!ids->empty()) {
    std::vector<int> raw_ids(ids->size());
    std::transform(ids->begin(), ids->end(), raw_ids.begin(),
-                   [](PlatformGpuId id) -> int { return id.value(); });
+                   [](PlatformDeviceId id) -> int { return id.value(); });
    LOG(INFO) << "Adding visible gpu devices: " << absl::StrJoin(raw_ids, ", ");
  }

--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@ -53,7 +53,8 @@ class BaseGPUDevice : public LocalDevice {
 public:
  BaseGPUDevice(const SessionOptions& options, const std::string& name,
                Bytes memory_limit, const DeviceLocality& locality,
-                TfGpuId tf_gpu_id, const std::string& physical_device_desc,
+                TfDeviceId tf_device_id,
+                const std::string& physical_device_desc,
                Allocator* gpu_allocator, Allocator* cpu_allocator,
                bool sync_every_op);

@ -87,9 +88,10 @@ class BaseGPUDevice : public LocalDevice {
  // Returns the platform GPU id of this device within the native driver system;
  // e.g., for CUDA and ROCm this is the ordinal of the GPU within the system.
  int gpu_id() const {
-    PlatformGpuId platform_gpu_id;
-    TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id_, &platform_gpu_id));
-    return platform_gpu_id.value();
+    PlatformDeviceId platform_device_id;
+    TF_CHECK_OK(
+        GpuIdManager::TfToPlatformDeviceId(tf_device_id_, &platform_device_id));
+    return platform_device_id.value();
  }

  // The executor that provides control for the device; e.g., for CUDA this
@ -146,7 +148,7 @@ class BaseGPUDevice : public LocalDevice {
  GPUDeviceContext* device_context_;
  GpuDeviceInfo* gpu_device_info_ = nullptr;
  mutex trace_mu_;
-  TfGpuId tf_gpu_id_;
+  TfDeviceId tf_device_id_;
  const bool sync_every_op_ = false;
  EventMgr* em_ = nullptr;
  std::unique_ptr<thread::ThreadPool> thread_pool_;
@ -325,53 +327,56 @@ class BaseGPUDeviceFactory : public DeviceFactory {
    int32 strength;
    static const int kSameDeviceStrength;
    static const int kStreamExecutorStrength;
-    std::set<std::pair<PlatformGpuId, PlatformGpuId>> directed_links;
+    std::set<std::pair<PlatformDeviceId, PlatformDeviceId>> directed_links;
  };

 protected:
  // Populates *maps with interconnect maps for all local direct access
  // pathways between GPUs.
  virtual Status GetInterconnectMaps(
-      const std::vector<PlatformGpuId>& visible_gpu_order,
+      const std::vector<PlatformDeviceId>& visible_gpu_order,
      se::Platform* gpu_manager, std::vector<InterconnectMap>* maps);

-  struct TfGpuIdHash {
-    std::size_t operator()(const TfGpuId& id) const noexcept {
+  struct TfDeviceIdHash {
+    std::size_t operator()(const TfDeviceId& id) const noexcept {
      return std::hash<int>{}(id.value());
    }
  };
-  typedef std::unordered_map<TfGpuId, DeviceLocality, TfGpuIdHash> LocalityMap;
+  typedef std::unordered_map<TfDeviceId, DeviceLocality, TfDeviceIdHash>
+      LocalityMap;
  // Populates *localities with the DeviceLocality descriptor for
-  // every TfGpuId.
+  // every TfDeviceId.
  virtual Status GetDeviceLocalities(
      int num_tf_gpus, const std::vector<InterconnectMap>& interconnects,
      LocalityMap* localities);

 private:
-  // Creates a BaseGPUDevice associated with 'tf_gpu_id', allocates (strictly)
-  // 'memory_limit' bytes of GPU memory to it, and adds it to the 'devices'
-  // vector.
+  // Creates a BaseGPUDevice associated with 'tf_device_id', allocates
+  // (strictly) 'memory_limit' bytes of GPU memory to it, and adds it to the
+  // 'devices' vector.
  Status CreateGPUDevice(const SessionOptions& options,
-                         const std::string& name_prefix, TfGpuId tf_gpu_id,
-                         int64 memory_limit, const DeviceLocality& dev_locality,
-                         size_t num_tf_gpus,
+                         const std::string& name_prefix,
+                         TfDeviceId tf_device_id, int64 memory_limit,
+                         const DeviceLocality& dev_locality, size_t num_tf_gpus,
                         std::vector<std::unique_ptr<Device>>* devices);

  virtual std::unique_ptr<BaseGPUDevice> CreateGPUDevice(
      const SessionOptions& options, const string& name, Bytes memory_limit,
-      const DeviceLocality& dev_locality, TfGpuId tf_gpu_id,
+      const DeviceLocality& dev_locality, TfDeviceId tf_device_id,
      const string& physical_device_desc, Allocator* gpu_allocator,
      Allocator* cpu_allocator) = 0;

-  Status EnablePeerAccess(const std::vector<PlatformGpuId>& visible_gpu_order);
+  Status EnablePeerAccess(
+      const std::vector<PlatformDeviceId>& visible_gpu_order);

  // Returns into 'ids' the list of valid platform GPU ids, in the order that
  // they should map to TF GPU ids "/device:GPU:0", "/device:GPU:1", etc,
  // based upon 'visible_gpu_order' which was generated by parsing
  // GPUOptions::visible_device_list which is a comma-separated list of CUDA or
  // ROCm GPU ids.
-  Status GetValidDeviceIds(const std::vector<PlatformGpuId>& visible_gpu_order,
-                           std::vector<PlatformGpuId>* ids);
+  Status GetValidDeviceIds(
+      const std::vector<PlatformDeviceId>& visible_gpu_order,
+      std::vector<PlatformDeviceId>* ids);

  // Cache the valid device IDs if not already cached. Cached IDs are stored in
  // field cached_device_ids_. Passes {0, 1, ..., num_devices-1} to
@ -379,14 +384,14 @@ class BaseGPUDeviceFactory : public DeviceFactory {
  // devices should be treated as visible, like ListPhysicalDevices.
  Status CacheDeviceIds();

-  // visible_gpu_initialized_[platform_gpu_id] is true if visible GPU
-  // platform_gpu_id has been initialized by the process.
+  // visible_gpu_initialized_[platform_device_id] is true if visible GPU
+  // platform_device_id has been initialized by the process.
  std::unordered_map<int, bool> visible_gpu_initialized_;

  // Cached device IDs, as returned by GetValidDeviceIds when every physical
  // device is visible. Cache should not be used if some devices are not
  // visible.
-  std::vector<PlatformGpuId> cached_device_ids_;
+  std::vector<PlatformDeviceId> cached_device_ids_;
 };

 }  // namespace tensorflow
--- a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
@ -30,9 +30,9 @@ class GPUDevice : public BaseGPUDevice {
 public:
  GPUDevice(const SessionOptions& options, const string& name,
            Bytes memory_limit, const DeviceLocality& locality,
-            TfGpuId tf_gpu_id, const string& physical_device_desc,
+            TfDeviceId tf_device_id, const string& physical_device_desc,
            Allocator* gpu_allocator, Allocator* cpu_allocator)
-      : BaseGPUDevice(options, name, memory_limit, locality, tf_gpu_id,
+      : BaseGPUDevice(options, name, memory_limit, locality, tf_device_id,
                      physical_device_desc, gpu_allocator, cpu_allocator,
                      false /* sync every op */) {
    if (options.config.has_gpu_options()) {
@ -63,11 +63,11 @@ class GPUDeviceFactory : public BaseGPUDeviceFactory {
 private:
  std::unique_ptr<BaseGPUDevice> CreateGPUDevice(
      const SessionOptions& options, const string& name, Bytes memory_limit,
-      const DeviceLocality& locality, TfGpuId tf_gpu_id,
+      const DeviceLocality& locality, TfDeviceId tf_device_id,
      const string& physical_device_desc, Allocator* gpu_allocator,
      Allocator* cpu_allocator) override {
    return absl::make_unique<GPUDevice>(options, name, memory_limit, locality,
-                                        tf_gpu_id, physical_device_desc,
+                                        tf_device_id, physical_device_desc,
                                        gpu_allocator, cpu_allocator);
  }
 };
--- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc
@ -30,7 +30,7 @@ namespace tensorflow {
 namespace {
 const char* kDeviceNamePrefix = "/job:localhost/replica:0/task:0";

-int64 GetTotalGPUMemory(PlatformGpuId gpu_id) {
+int64 GetTotalGPUMemory(PlatformDeviceId gpu_id) {
  se::StreamExecutor* se =
      DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(), gpu_id)
          .ValueOrDie();
@ -40,7 +40,7 @@ int64 GetTotalGPUMemory(PlatformGpuId gpu_id) {
  return total_memory;
 }

-Status GetComputeCapability(PlatformGpuId gpu_id, int* cc_major,
+Status GetComputeCapability(PlatformDeviceId gpu_id, int* cc_major,
                            int* cc_minor) {
  se::StreamExecutor* se =
      DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(), gpu_id)
@ -350,7 +350,7 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) {
 // error.
 TEST_F(GPUDeviceTest, UnifiedMemoryUnavailableOnPrePascalGpus) {
  int cc_major, cc_minor;
-  TF_ASSERT_OK(GetComputeCapability(PlatformGpuId(0), &cc_major, &cc_minor));
+  TF_ASSERT_OK(GetComputeCapability(PlatformDeviceId(0), &cc_major, &cc_minor));
  // Exit early while running on Pascal or later GPUs.
  if (cc_major >= 6) {
    return;
@ -371,10 +371,10 @@ TEST_F(GPUDeviceTest, UnifiedMemoryUnavailableOnPrePascalGpus) {
 // more memory than what is available on the device.
 TEST_F(GPUDeviceTest, UnifiedMemoryAllocation) {
  static constexpr double kGpuMemoryFraction = 1.2;
-  static constexpr PlatformGpuId kPlatformGpuId(0);
+  static constexpr PlatformDeviceId kPlatformDeviceId(0);

  int cc_major, cc_minor;
-  TF_ASSERT_OK(GetComputeCapability(kPlatformGpuId, &cc_major, &cc_minor));
+  TF_ASSERT_OK(GetComputeCapability(kPlatformDeviceId, &cc_major, &cc_minor));
  // Exit early if running on pre-Pascal GPUs.
  if (cc_major < 6) {
    LOG(INFO)
@ -389,8 +389,9 @@ TEST_F(GPUDeviceTest, UnifiedMemoryAllocation) {
  ASSERT_EQ(1, devices.size());

  int64 memory_limit = devices[0]->attributes().memory_limit();
-  ASSERT_EQ(memory_limit, static_cast<int64>(GetTotalGPUMemory(kPlatformGpuId) *
-                                             kGpuMemoryFraction));
+  ASSERT_EQ(memory_limit,
+            static_cast<int64>(GetTotalGPUMemory(kPlatformDeviceId) *
+                               kGpuMemoryFraction));

  AllocatorAttributes allocator_attributes = AllocatorAttributes();
  allocator_attributes.set_gpu_compatible(true);
--- a/tensorflow/core/common_runtime/gpu/gpu_id.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_id.h
@ -17,13 +17,6 @@ limitations under the License.

 #include "tensorflow/core/common_runtime/device/device_id.h"

-namespace tensorflow {
-
-// TODO(annarev): remove these aliases after all references are updated
-// to use device ids.
-typedef TfDeviceId TfGpuId;
-typedef PlatformDeviceId PlatformGpuId;
-
-}  // namespace tensorflow
+// TODO(sanjoy): Delete the header and forward the references.

 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_H_
--- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc
@ -20,16 +20,16 @@ limitations under the License.

 namespace tensorflow {

-Status GpuIdManager::InsertTfPlatformGpuIdPair(
-    TfDeviceId tf_gpu_id, PlatformDeviceId platform_gpu_id) {
-  return DeviceIdManager::InsertTfPlatformDeviceIdPair(DEVICE_GPU, tf_gpu_id,
-                                                       platform_gpu_id);
+Status GpuIdManager::InsertTfPlatformDeviceIdPair(
+    TfDeviceId tf_device_id, PlatformDeviceId platform_device_id) {
+  return DeviceIdManager::InsertTfPlatformDeviceIdPair(DEVICE_GPU, tf_device_id,
+                                                       platform_device_id);
 }

-Status GpuIdManager::TfToPlatformGpuId(TfDeviceId tf_gpu_id,
-                                       PlatformDeviceId* platform_gpu_id) {
-  return DeviceIdManager::TfToPlatformDeviceId(DEVICE_GPU, tf_gpu_id,
-                                               platform_gpu_id);
+Status GpuIdManager::TfToPlatformDeviceId(
+    TfDeviceId tf_device_id, PlatformDeviceId* platform_device_id) {
+  return DeviceIdManager::TfToPlatformDeviceId(DEVICE_GPU, tf_device_id,
+                                               platform_device_id);
 }

 void GpuIdManager::TestOnlyReset() { DeviceIdManager::TestOnlyReset(); }
--- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h
@ -21,17 +21,18 @@ limitations under the License.

 namespace tensorflow {

-// Class that maintains a map from TfGpuId to PlatformGpuId, and manages the
-// translation between them.
+// Class that maintains a map from TfDeviceId to PlatformDeviceId, and manages
+// the translation between them.
 class GpuIdManager {
 public:
-  // Adds a mapping from tf_gpu_id to platform_gpu_id.
-  static Status InsertTfPlatformGpuIdPair(TfDeviceId tf_gpu_id,
-                                          PlatformDeviceId platform_gpu_id);
+  // Adds a mapping from tf_device_id to platform_device_id.
+  static Status InsertTfPlatformDeviceIdPair(
+      TfDeviceId tf_device_id, PlatformDeviceId platform_device_id);

-  // Gets the platform_gpu_id associated with tf_gpu_id. Returns OK if found.
-  static Status TfToPlatformGpuId(TfDeviceId tf_gpu_id,
-                                  PlatformDeviceId* platform_gpu_id);
+  // Gets the platform_device_id associated with tf_device_id. Returns OK if
+  // found.
+  static Status TfToPlatformDeviceId(TfDeviceId tf_device_id,
+                                     PlatformDeviceId* platform_device_id);

  // Clears the map. Used in unit tests only.
  static void TestOnlyReset();
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
@ -83,10 +83,10 @@ GPUProcessState::GPUProcessState() : gpu_device_enabled_(false) {
  process_state_ = ProcessState::singleton();
 }

-int GPUProcessState::BusIdForGPU(TfGpuId tf_gpu_id) {
+int GPUProcessState::BusIdForGPU(TfDeviceId tf_device_id) {
  // Return the NUMA node associated with the GPU's StreamExecutor.
  se::StreamExecutor* se = DeviceIdUtil::ExecutorForTfDeviceId(
-                               DEVICE_GPU, GPUMachineManager(), tf_gpu_id)
+                               DEVICE_GPU, GPUMachineManager(), tf_device_id)
                               .ValueOrDie();
  int numa_node = se->GetDeviceDescription().numa_node();
  // bus_id must be non-negative.  If the numa_node is not known,
@ -96,11 +96,11 @@ int GPUProcessState::BusIdForGPU(TfGpuId tf_gpu_id) {

 // NOLINTNEXTLINE: clang-tidy complains this is unused because of build flags.
 static SubAllocator* CreateSubAllocator(
-    const GPUOptions& options, PlatformGpuId platform_gpu_id,
+    const GPUOptions& options, PlatformDeviceId platform_device_id,
    const std::vector<SubAllocator::Visitor>& alloc_visitors,
-    size_t total_bytes, const std::vector<TfGpuId>& peer_gpu_ids) {
+    size_t total_bytes, const std::vector<TfDeviceId>& peer_gpu_ids) {
  auto executor = DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(),
-                                                            platform_gpu_id)
+                                                            platform_device_id)
                      .ValueOrDie();

  // FIXME(imintz): Observed OOM issues when using the virtual memory
@ -110,21 +110,21 @@ static SubAllocator* CreateSubAllocator(
  // TODO(imintz): Remove the cuMemAlloc capability of this allocator.
  if (options.per_process_gpu_memory_fraction() > 1.0 ||
      options.experimental().use_unified_memory()) {
-    return new DeviceMemAllocator(executor, platform_gpu_id,
+    return new DeviceMemAllocator(executor, platform_device_id,
                                  /*use_unified_memory=*/true, alloc_visitors,
                                  {});
  } else {
    auto* gpu_context = reinterpret_cast<stream_executor::gpu::GpuContext*>(
        executor->implementation()->GpuContextHack());

-    absl::flat_hash_set<PlatformGpuId> platform_peer_gpu_ids;
+    absl::flat_hash_set<PlatformDeviceId> platform_peer_gpu_ids;
    platform_peer_gpu_ids.reserve(peer_gpu_ids.size());
-    for (const TfGpuId tf_gpu_id : peer_gpu_ids) {
-      PlatformGpuId platform_gpu_id;
-      TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
-      platform_peer_gpu_ids.insert(platform_gpu_id);
+    for (const TfDeviceId tf_device_id : peer_gpu_ids) {
+      PlatformDeviceId platform_device_id;
+      TF_CHECK_OK(GpuIdManager::TfToPlatformDeviceId(tf_device_id, &platform_device_id));
+      platform_peer_gpu_ids.insert(platform_device_id);
    }
-    std::vector<PlatformGpuId> platform_peer_gpu_ids_vec(
+    std::vector<PlatformDeviceId> platform_peer_gpu_ids_vec(
        platform_peer_gpu_ids.begin(), platform_peer_gpu_ids.end());

    // Adjust virtual address space to be slightly larger than the physical
@ -133,7 +133,7 @@ static SubAllocator* CreateSubAllocator(
    // TODO(imintz): Update BFC allocator to ensure it doesn't create holes in
    // the va space.
    return GpuVirtualMemAllocator::Create(
-               alloc_visitors, {}, *gpu_context, platform_gpu_id,
+               alloc_visitors, {}, *gpu_context, platform_device_id,
               /*virtual_address_space_size=*/total_bytes * 2,
               platform_peer_gpu_ids_vec)
        .ValueOrDie()
@ -141,7 +141,7 @@ static SubAllocator* CreateSubAllocator(
  }
 #else
  return new DeviceMemAllocator(
-      executor, platform_gpu_id,
+      executor, platform_device_id,
      (options.per_process_gpu_memory_fraction() > 1.0 ||
       options.experimental().use_unified_memory()),
      alloc_visitors, {});
@ -149,21 +149,21 @@ static SubAllocator* CreateSubAllocator(
 }

 Allocator* GPUProcessState::GetGPUAllocator(
-    const GPUOptions& options, TfGpuId tf_gpu_id, size_t total_bytes,
-    const std::vector<TfGpuId>& peer_gpu_ids) {
+    const GPUOptions& options, TfDeviceId tf_device_id, size_t total_bytes,
+    const std::vector<TfDeviceId>& peer_gpu_ids) {
  CHECK(process_state_);
 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
    (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
  const string& allocator_type = options.allocator_type();
  mutex_lock lock(mu_);
  DeviceIdUtil::CheckValidTfDeviceId(DEVICE_GPU, GPUMachineManager(),
-                                     tf_gpu_id);
+                                     tf_device_id);

-  if (tf_gpu_id.value() >= static_cast<int64>(gpu_allocators_.size())) {
-    gpu_allocators_.resize(tf_gpu_id.value() + 1);
+  if (tf_device_id.value() >= static_cast<int64>(gpu_allocators_.size())) {
+    gpu_allocators_.resize(tf_device_id.value() + 1);
  }

-  AllocatorParts& allocator_parts = gpu_allocators_[tf_gpu_id.value()];
+  AllocatorParts& allocator_parts = gpu_allocators_[tf_device_id.value()];
  if (allocator_parts.allocator == nullptr) {
    // Validate allocator types.
    if (!allocator_type.empty() && allocator_type != "BFC") {
@ -171,19 +171,20 @@ Allocator* GPUProcessState::GetGPUAllocator(
      return nullptr;
    }

-    PlatformGpuId platform_gpu_id;
-    TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
-    int bus_id = BusIdForGPU(tf_gpu_id);
+    PlatformDeviceId platform_device_id;
+    TF_CHECK_OK(
+        GpuIdManager::TfToPlatformDeviceId(tf_device_id, &platform_device_id));
+    int bus_id = BusIdForGPU(tf_device_id);
    DCHECK_GE(bus_id, 0);
    while (bus_id >= gpu_visitors_.size()) {
      gpu_visitors_.push_back({});
    }
    auto* sub_allocator =
-        CreateSubAllocator(options, platform_gpu_id, gpu_visitors_[bus_id],
+        CreateSubAllocator(options, platform_device_id, gpu_visitors_[bus_id],
                           total_bytes, peer_gpu_ids);
-    GPUBFCAllocator* gpu_bfc_allocator =
-        new GPUBFCAllocator(sub_allocator, total_bytes, options,
-                            strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc"));
+    GPUBFCAllocator* gpu_bfc_allocator = new GPUBFCAllocator(
+        sub_allocator, total_bytes, options,
+        strings::StrCat("GPU_", tf_device_id.value(), "_bfc"));
    Allocator* gpu_allocator = gpu_bfc_allocator;
    SharedCounter* timing_counter = nullptr;
    if (options.experimental().timestamped_allocator()) {
@ -195,29 +196,30 @@ Allocator* GPUProcessState::GetGPUAllocator(
    // distinctive patterns on both ends of allocated memory.
    if (UseCudaMemoryGuardAllocator()) {
      LOG(INFO) << "Using memory guard allocator for GPU.";
-      gpu_allocator = new GPUDebugAllocator(gpu_allocator, platform_gpu_id);
-      gpu_allocator = new GPUNanResetAllocator(gpu_allocator, platform_gpu_id);
+      gpu_allocator = new GPUDebugAllocator(gpu_allocator, platform_device_id);
+      gpu_allocator =
+          new GPUNanResetAllocator(gpu_allocator, platform_device_id);
    } else if (UseCudaMallocAllocator()) {
      LOG(INFO) << "Using CUDA malloc allocator for GPU.";
      // If true, passes all allocation requests through to cudaMalloc
      // useful for doing memory debugging with tools like cuda-memcheck
      // **WARNING** probably will not work in a multi-gpu scenario
      gpu_allocator =
-          new GPUcudaMallocAllocator(gpu_allocator, platform_gpu_id);
+          new GPUcudaMallocAllocator(gpu_allocator, platform_device_id);
    } else if (UseCudaMallocAsyncAllocator()) {
      LOG(INFO) << "Using CUDA malloc Async allocator for GPU.";
      // If true, passes all allocation requests through to cudaMallocAsync
      // TODO: useful for doing memory debugging with tools like cuda-memcheck
      // TODO: **WARNING** probably will not work in a multi-gpu scenario
      gpu_allocator =
-          new GpuCudaMallocAsyncAllocator(platform_gpu_id, total_bytes);
+          new GpuCudaMallocAsyncAllocator(platform_device_id, total_bytes);
    }

    Allocator* recording_allocator = nullptr;
    if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) {
      ProcessState::MemDesc md;
      md.loc = ProcessState::MemDesc::GPU;
-      md.dev_index = platform_gpu_id.value();
+      md.dev_index = platform_device_id.value();
      md.gpu_registered = false;
      md.nic_registered = true;
      recording_allocator = new internal::RecordingAllocator(
@ -240,20 +242,20 @@ Allocator* GPUProcessState::GetGPUAllocator(
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 }

-SharedCounter* GPUProcessState::GPUAllocatorCounter(TfGpuId tf_gpu_id) {
+SharedCounter* GPUProcessState::GPUAllocatorCounter(TfDeviceId tf_device_id) {
  DCHECK(process_state_);
 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
    (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
  DeviceIdUtil::CheckValidTfDeviceId(DEVICE_GPU, GPUMachineManager(),
-                                     tf_gpu_id);
+                                     tf_device_id);
  mutex_lock l(mu_);
-  if (tf_gpu_id.value() >= static_cast<int64>(gpu_allocators_.size())) {
-    LOG(ERROR) << "Asked for counter for GPU allocator " << tf_gpu_id.value()
+  if (tf_device_id.value() >= static_cast<int64>(gpu_allocators_.size())) {
+    LOG(ERROR) << "Asked for counter for GPU allocator " << tf_device_id.value()
               << " but only have " << gpu_allocators_.size();
    return nullptr;
  }

-  AllocatorParts& allocator_parts = gpu_allocators_[tf_gpu_id.value()];
+  AllocatorParts& allocator_parts = gpu_allocators_[tf_device_id.value()];
  if (allocator_parts.counter.get() == nullptr) {
    SharedCounter* timing_counter = new SharedCounter;
    allocator_parts.bfc_allocator->SetTimingCounter(timing_counter);
@ -303,7 +305,7 @@ Allocator* GPUProcessState::GetGpuHostAllocator(int numa_node) {
  for (int i = 0; i < static_cast<int>(gpu_allocators_.size()); ++i) {
    if (gpu_allocators_[i].allocator != nullptr) {
      se = DeviceIdUtil::ExecutorForTfDeviceId(DEVICE_GPU, GPUMachineManager(),
-                                               TfGpuId(i))
+                                               TfDeviceId(i))
               .ValueOrDie();
      break;
    }
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.h
@ -72,18 +72,18 @@ class GPUProcessState {
  //
  // 'total_bytes' is the total number of bytes that should be made
  // available to the allocator.  The first call to this function for
-  // a given tf_gpu_id creates the allocator, so only the total_bytes
+  // a given tf_device_id creates the allocator, so only the total_bytes
  // used on that first call is used.
  //
  // "Allocator type" describes the type of algorithm to use for the
  // underlying allocator.  REQUIRES: Must be a valid type (see
  // config.proto for the list of supported strings.).
  //
-  // REQUIRES: tf_gpu_id must be a valid id for a BaseGPUDevice available in the
-  // current system environment.  Otherwise returns nullptr.
-  virtual Allocator* GetGPUAllocator(const GPUOptions& options,
-                                     TfGpuId tf_gpu_id, size_t total_bytes,
-                                     const std::vector<TfGpuId>& peer_gpu_ids);
+  // REQUIRES: tf_device_id must be a valid id for a BaseGPUDevice available in
+  // the current system environment.  Otherwise returns nullptr.
+  virtual Allocator* GetGPUAllocator(
+      const GPUOptions& options, TfDeviceId tf_device_id, size_t total_bytes,
+      const std::vector<TfDeviceId>& peer_gpu_ids);

  int NumGPUAllocators() {
    mutex_lock l(mu_);
@ -115,9 +115,9 @@ class GPUProcessState {
                                     const SubAllocator::Visitor& visitor);

  // Returns bus_id for the given GPU id.
-  virtual int BusIdForGPU(TfGpuId tf_gpu_id);
+  virtual int BusIdForGPU(TfDeviceId tf_device_id);

-  SharedCounter* GPUAllocatorCounter(TfGpuId tf_gpu_id);
+  SharedCounter* GPUAllocatorCounter(TfDeviceId tf_device_id);

 protected:
  // GPUProcessState is a singleton that should not normally be deleted except
--- a/tensorflow/core/common_runtime/gpu/gpu_virtual_mem_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_virtual_mem_allocator.cc
@ -44,7 +44,7 @@ StatusOr<bool> SupportsVirtualAddressManagement(GpuDeviceHandle device) {
 }

 Status CheckVirtualAddressManagementSupport(GpuDeviceHandle device,
-                                            PlatformGpuId gpu_id) {
+                                            PlatformDeviceId gpu_id) {
  TF_ASSIGN_OR_RETURN(bool supports_virtual_address_management,
                      SupportsVirtualAddressManagement(device));
  if (!supports_virtual_address_management) {
@ -59,11 +59,11 @@ Status CheckVirtualAddressManagementSupport(GpuDeviceHandle device,

 /* static */ stream_executor::port::StatusOr<
    std::unique_ptr<GpuVirtualMemAllocator>>
-GpuVirtualMemAllocator::Create(const std::vector<Visitor>& alloc_visitors,
-                               const std::vector<Visitor>& free_visitors,
-                               GpuContext& gpu_context, PlatformGpuId gpu_id,
-                               size_t virtual_address_space_size,
-                               const std::vector<PlatformGpuId>& peer_gpu_ids) {
+GpuVirtualMemAllocator::Create(
+    const std::vector<Visitor>& alloc_visitors,
+    const std::vector<Visitor>& free_visitors, GpuContext& gpu_context,
+    PlatformDeviceId gpu_id, size_t virtual_address_space_size,
+    const std::vector<PlatformDeviceId>& peer_gpu_ids) {
  std::vector<GpuDeviceHandle> access_gpu_handles;
  access_gpu_handles.reserve(peer_gpu_ids.size() + 1);

@ -111,7 +111,8 @@ GpuVirtualMemAllocator::Create(const std::vector<Visitor>& alloc_visitors,
 GpuVirtualMemAllocator::GpuVirtualMemAllocator(
    const std::vector<Visitor>& alloc_visitors,
    const std::vector<Visitor>& free_visitors, GpuContext& gpu_context,
-    PlatformGpuId gpu_id, const std::vector<GpuDeviceHandle> access_gpu_handles,
+    PlatformDeviceId gpu_id,
+    const std::vector<GpuDeviceHandle> access_gpu_handles,
    GpuDriver::VmemSpan vmem, size_t granularity)
    : SubAllocator(alloc_visitors, free_visitors),
      gpu_context_(gpu_context),
--- a/tensorflow/core/common_runtime/gpu/gpu_virtual_mem_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_virtual_mem_allocator.h
@ -44,9 +44,9 @@ class GpuVirtualMemAllocator : public SubAllocator {
      std::unique_ptr<GpuVirtualMemAllocator>>
  Create(const std::vector<Visitor>& alloc_visitors,
         const std::vector<Visitor>& free_visitors,
-         stream_executor::gpu::GpuContext& gpu_context, PlatformGpuId gpu_id,
+         stream_executor::gpu::GpuContext& gpu_context, PlatformDeviceId gpu_id,
         size_t virtual_address_space_size,
-         const std::vector<PlatformGpuId>& peer_gpu_ids);
+         const std::vector<PlatformDeviceId>& peer_gpu_ids);
  ~GpuVirtualMemAllocator() override;

  // Allocates memory at least as large as requested by num_bytes. Will be
@ -74,12 +74,12 @@ class GpuVirtualMemAllocator : public SubAllocator {
  GpuVirtualMemAllocator(
      const std::vector<Visitor>& alloc_visitors,
      const std::vector<Visitor>& free_visitors,
-      stream_executor::gpu::GpuContext& gpu_context, PlatformGpuId gpu_id,
+      stream_executor::gpu::GpuContext& gpu_context, PlatformDeviceId gpu_id,
      std::vector<stream_executor::gpu::GpuDeviceHandle> access_device_handles,
      stream_executor::gpu::GpuDriver::VmemSpan vmem, size_t granularity);

  stream_executor::gpu::GpuContext& gpu_context_;
-  PlatformGpuId gpu_id_;
+  PlatformDeviceId gpu_id_;

  // Peer access is configured at mmap time so the allocator must be aware of
  // all gpus that may want to read the memory. This list also includes the
--- a/tensorflow/core/common_runtime/gpu/gpu_virtual_mem_allocator_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_virtual_mem_allocator_test.cc
@ -35,7 +35,7 @@ constexpr size_t k2MiB{2 << 20};

 // Creates an allocator with 8 MiB of virtual address space.
 std::unique_ptr<GpuVirtualMemAllocator> CreateAllocator() {
-  PlatformGpuId gpu_id(0);
+  PlatformDeviceId gpu_id(0);
  auto executor =
      DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(), gpu_id)
          .ValueOrDie();
@ -48,7 +48,7 @@ std::unique_ptr<GpuVirtualMemAllocator> CreateAllocator() {
 }

 TEST(GpuVirtualMemAllocatorTest, SimpleAlloc) {
-  PlatformGpuId gpu_id(0);
+  PlatformDeviceId gpu_id(0);
  auto executor =
      DeviceIdUtil::ExecutorForPlatformDeviceId(GPUMachineManager(), gpu_id)
          .ValueOrDie();
--- a/tensorflow/core/grappler/clusters/single_machine.cc
+++ b/tensorflow/core/grappler/clusters/single_machine.cc
@ -92,14 +92,15 @@ Status SingleMachine::Provision() {
        return errors::InvalidArgument(
            strings::StrCat("Not able to parse GPU device name: ", dev.name()));
      }
-      TfGpuId tf_gpu_id(parsed.id);
-      PlatformGpuId platform_gpu_id;
-      Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
+      TfDeviceId tf_device_id(parsed.id);
+      PlatformDeviceId platform_device_id;
+      Status s =
+          GpuIdManager::TfToPlatformDeviceId(tf_device_id, &platform_device_id);
      if (!s.ok()) {
        return errors::Unavailable("Unknown TF GPU device with id ",
-                                   tf_gpu_id.value(), ": ", s.ToString());
+                                   tf_device_id.value(), ": ", s.ToString());
      }
-      attr = GetLocalGPUInfo(platform_gpu_id);
+      attr = GetLocalGPUInfo(platform_device_id);
    } else if (dev.device_type().find("XLA") == string::npos) {
      // Filter out the fake XLA devices to avoid double counting the actual
      // hardware resources that are available.
--- a/tensorflow/core/grappler/clusters/utils.cc
+++ b/tensorflow/core/grappler/clusters/utils.cc
@ -74,14 +74,14 @@ DeviceProperties GetLocalCPUInfo() {
  return device;
 }

-DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id) {
+DeviceProperties GetLocalGPUInfo(PlatformDeviceId platform_device_id) {
  DeviceProperties device;
  device.set_type("GPU");

 #if GOOGLE_CUDA
  cudaDeviceProp properties;
  cudaError_t error =
-      cudaGetDeviceProperties(&properties, platform_gpu_id.value());
+      cudaGetDeviceProperties(&properties, platform_device_id.value());
  if (error != cudaSuccess) {
    device.set_type("UNKNOWN");
    LOG(ERROR) << "Failed to get device properties, error code: " << error;
@ -117,7 +117,7 @@ DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id) {
 #elif TENSORFLOW_USE_ROCM
  hipDeviceProp_t properties;
  hipError_t error =
-      hipGetDeviceProperties(&properties, platform_gpu_id.value());
+      hipGetDeviceProperties(&properties, platform_device_id.value());
  if (error != hipSuccess) {
    device.set_type("UNKNOWN");
    LOG(ERROR) << "Failed to get device properties, error code: " << error;
@ -156,16 +156,17 @@ DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) {
    return GetLocalCPUInfo();
  } else if (device.type == "GPU") {
    if (device.has_id) {
-      TfGpuId tf_gpu_id(device.id);
-      PlatformGpuId platform_gpu_id;
-      Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
+      TfDeviceId tf_device_id(device.id);
+      PlatformDeviceId platform_device_id;
+      Status s =
+          GpuIdManager::TfToPlatformDeviceId(tf_device_id, &platform_device_id);
      if (!s.ok()) {
        LOG(ERROR) << s;
        return unknown;
      }
-      return GetLocalGPUInfo(platform_gpu_id);
+      return GetLocalGPUInfo(platform_device_id);
    } else {
-      return GetLocalGPUInfo(PlatformGpuId(0));
+      return GetLocalGPUInfo(PlatformDeviceId(0));
    }
  }
  return unknown;
--- a/tensorflow/core/grappler/clusters/utils.h
+++ b/tensorflow/core/grappler/clusters/utils.h
@ -28,7 +28,7 @@ DeviceProperties GetLocalCPUInfo();

 // Returns the DeviceProperties for the specified GPU attached to the server on
 // which grappler is running.
-DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id);
+DeviceProperties GetLocalGPUInfo(PlatformDeviceId platform_device_id);

 // Returns the DeviceProperties of the specified device
 DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device);
--- a/tensorflow/core/grappler/clusters/utils_test.cc
+++ b/tensorflow/core/grappler/clusters/utils_test.cc
@ -33,11 +33,11 @@ TEST(UtilsTest, GetLocalGPUInfo) {
  DeviceProperties properties;

  // Invalid platform GPU ID.
-  properties = GetLocalGPUInfo(PlatformGpuId(100));
+  properties = GetLocalGPUInfo(PlatformDeviceId(100));
  EXPECT_EQ("UNKNOWN", properties.type());

  // Succeed when a valid platform GPU id was inserted.
-  properties = GetLocalGPUInfo(PlatformGpuId(0));
+  properties = GetLocalGPUInfo(PlatformDeviceId(0));
  EXPECT_EQ("GPU", properties.type());
  EXPECT_EQ("NVIDIA", properties.vendor());
 #elif TENSORFLOW_USE_ROCM
@ -45,21 +45,21 @@ TEST(UtilsTest, GetLocalGPUInfo) {
  DeviceProperties properties;

  // Invalid platform GPU ID.
-  properties = GetLocalGPUInfo(PlatformGpuId(100));
+  properties = GetLocalGPUInfo(PlatformDeviceId(100));
  EXPECT_EQ("UNKNOWN", properties.type());

  // Succeed when a valid platform GPU id was inserted.
-  properties = GetLocalGPUInfo(PlatformGpuId(0));
+  properties = GetLocalGPUInfo(PlatformDeviceId(0));
  EXPECT_EQ("GPU", properties.type());
  EXPECT_EQ("Advanced Micro Devices, Inc", properties.vendor());
 #else
  LOG(INFO) << "CUDA is not enabled.";
  DeviceProperties properties;

-  properties = GetLocalGPUInfo(PlatformGpuId(0));
+  properties = GetLocalGPUInfo(PlatformDeviceId(0));
  EXPECT_EQ("GPU", properties.type());

-  properties = GetLocalGPUInfo(PlatformGpuId(100));
+  properties = GetLocalGPUInfo(PlatformDeviceId(100));
  EXPECT_EQ("GPU", properties.type());
 #endif
 }
@ -97,14 +97,14 @@ TEST(UtilsTest, GetDeviceInfo) {

 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
  // Invalid platform GPU id.
-  TF_ASSERT_OK(
-      GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(0), PlatformGpuId(100)));
+  TF_ASSERT_OK(GpuIdManager::InsertTfPlatformDeviceIdPair(
+      TfDeviceId(0), PlatformDeviceId(100)));
  properties = GetDeviceInfo(device);
  EXPECT_EQ("UNKNOWN", properties.type());

  // Valid platform GPU id.
-  TF_ASSERT_OK(
-      GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(1), PlatformGpuId(0)));
+  TF_ASSERT_OK(GpuIdManager::InsertTfPlatformDeviceIdPair(TfDeviceId(1),
+                                                          PlatformDeviceId(0)));
  device.id = 1;
  properties = GetDeviceInfo(device);
  EXPECT_EQ("GPU", properties.type());
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@ -241,14 +241,15 @@ DeviceProperties GetDeviceInfo(const string& device_str) {
  DeviceNameUtils::ParsedName parsed;
  if (DeviceNameUtils::ParseFullName(device_str, &parsed)) {
    if (parsed.type == "GPU") {
-      TfGpuId tf_gpu_id(parsed.id);
-      PlatformGpuId platform_gpu_id;
-      Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
+      TfDeviceId tf_device_id(parsed.id);
+      PlatformDeviceId platform_device_id;
+      Status s =
+          GpuIdManager::TfToPlatformDeviceId(tf_device_id, &platform_device_id);
      if (!s.ok()) {
        // We are probably running simulation without linking cuda libraries.
-        platform_gpu_id = PlatformGpuId(parsed.id);
+        platform_device_id = PlatformDeviceId(parsed.id);
      }
-      return GetLocalGPUInfo(platform_gpu_id);
+      return GetLocalGPUInfo(platform_device_id);
    } else if (parsed.type == "CPU") {
      return GetLocalCPUInfo();
    }