From d236afda36626f3dd6dfea234413b3b4d62fc9a0 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Tue, 22 Dec 2020 16:17:25 +0000 Subject: [PATCH] Adding hooks in the Stream Executor API to get/set the AMDGPU gcnArchName device property --- tensorflow/core/common_runtime/gpu/gpu_device.cc | 8 ++------ tensorflow/stream_executor/cuda/cuda_driver.cc | 7 +++++++ tensorflow/stream_executor/device_description.cc | 3 +++ tensorflow/stream_executor/device_description.h | 14 ++++++++++++++ tensorflow/stream_executor/gpu/gpu_driver.h | 6 ++++++ tensorflow/stream_executor/rocm/rocm_driver.cc | 15 +++++++++++++++ .../stream_executor/rocm/rocm_gpu_executor.cc | 10 +++++++++- tensorflow/stream_executor/tpu/c_api_decl.h | 1 + 8 files changed, 57 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 50647b8cd76..1fc99c5a520 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -1770,15 +1770,11 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds( << strings::HumanReadableNumBytes(description->memory_bandwidth()) << "/s"; #elif TENSORFLOW_USE_ROCM - int isa_version; - if (!description->rocm_amdgpu_isa_version(&isa_version)) { - // Logs internally on failure. - isa_version = 0; - } + std::string gcn_arch_name = description->rocm_amdgpu_gcn_arch_name(); LOG(INFO) << "Found device " << i << " with properties: " << "\npciBusID: " << description->pci_bus_id() << " name: " << description->name() - << " ROCm AMD GPU ISA: gfx" << isa_version + << " ROCm AMDGPU Arch: " << gcn_arch_name << "\ncoreClock: " << description->clock_rate_ghz() << "GHz" << " coreCount: " << description->core_count() << " deviceMemorySize: " diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index 42db563a0bd..2b64db17197 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -1388,6 +1388,13 @@ GpuDriver::CreateMemoryHandle(GpuContext* context, uint64 bytes) { "Feature not supported on CUDA platform (GetGpuISAVersion)"}; } +/* static */ port::Status GpuDriver::GetGpuGCNArchName( + CUdevice device, std::string* gcnArchName) { + return port::Status{ + port::error::INTERNAL, + "Feature not supported on CUDA platform (GetGpuGCNArchName)"}; +} + // Helper function that turns the integer output of cuDeviceGetAttribute to type // T and wraps it in a StatusOr. template diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc index 130e2e638e5..4ffe02e7d96 100644 --- a/tensorflow/stream_executor/device_description.cc +++ b/tensorflow/stream_executor/device_description.cc @@ -51,6 +51,7 @@ DeviceDescription::DeviceDescription() cuda_compute_capability_major_(-1), cuda_compute_capability_minor_(-1), rocm_amdgpu_isa_version_(-1), + rocm_amdgpu_gcn_arch_name_(kUndefinedString), numa_node_(-1), core_count_(-1), ecc_enabled_(false) {} @@ -95,6 +96,8 @@ std::unique_ptr> DeviceDescription::ToMap() result["CUDA Compute Capability"] = absl::StrCat( cuda_compute_capability_major_, ".", cuda_compute_capability_minor_); + result["AMDGPU GCN Arch Name"] = absl::StrCat(rocm_amdgpu_gcn_arch_name_); + result["NUMA Node"] = absl::StrCat(numa_node()); result["Core Count"] = absl::StrCat(core_count()); result["ECC Enabled"] = absl::StrCat(ecc_enabled()); diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h index fa7426eb04b..fef4be435ad 100644 --- a/tensorflow/stream_executor/device_description.h +++ b/tensorflow/stream_executor/device_description.h @@ -138,6 +138,13 @@ class DeviceDescription { // and the return value will be false. bool rocm_amdgpu_isa_version(int *version) const; + // Returns the + // * AMDGPU GCN Architecture Name if we're running on the ROCm platform. + // * kUndefinedString otherwise + const std::string rocm_amdgpu_gcn_arch_name() const { + return rocm_amdgpu_gcn_arch_name_; + } + // Returns the maximum amount of shared memory present on a single core // (i.e. Streaming Multiprocessor on NVIDIA GPUs; Compute Unit for OpenCL // devices). Note that some devices, such as NVIDIA's have a configurable @@ -203,6 +210,9 @@ class DeviceDescription { // ROCM AMDGPU ISA version, 0 if not available. int rocm_amdgpu_isa_version_; + // ROCm AMDGPU GCN Architecture name, "" if not available. + std::string rocm_amdgpu_gcn_arch_name_; + int numa_node_; int core_count_; bool ecc_enabled_; @@ -294,6 +304,10 @@ class DeviceDescriptionBuilder { device_description_->rocm_amdgpu_isa_version_ = version; } + void set_rocm_amdgpu_gcn_arch_name(const std::string& gcn_arch_name) { + device_description_->rocm_amdgpu_gcn_arch_name_ = gcn_arch_name; + } + void set_numa_node(int value) { device_description_->numa_node_ = value; } void set_core_count(int value) { device_description_->core_count_ = value; } void set_ecc_enabled(bool value) { diff --git a/tensorflow/stream_executor/gpu/gpu_driver.h b/tensorflow/stream_executor/gpu/gpu_driver.h index 3cd13dcc013..955ed59926a 100644 --- a/tensorflow/stream_executor/gpu/gpu_driver.h +++ b/tensorflow/stream_executor/gpu/gpu_driver.h @@ -460,6 +460,12 @@ class GpuDriver { // (supported on ROCm only) static port::Status GetGpuISAVersion(int* version, GpuDeviceHandle device); + // Return the full GCN Architecture Name for the the device + // for eg: amdgcn-amd-amdhsa--gfx908:sramecc+:xnack- + // (supported on ROCm only) + static port::Status GetGpuGCNArchName(GpuDeviceHandle device, + std::string* gcnArchName); + // Returns the number of multiprocessors on the device (note that the device // may be multi-GPU-per-board). static port::StatusOr GetMultiprocessorCount(GpuDeviceHandle device); diff --git a/tensorflow/stream_executor/rocm/rocm_driver.cc b/tensorflow/stream_executor/rocm/rocm_driver.cc index a070979e71d..f7be297c3d7 100644 --- a/tensorflow/stream_executor/rocm/rocm_driver.cc +++ b/tensorflow/stream_executor/rocm/rocm_driver.cc @@ -1080,6 +1080,21 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) { device)}; } +/* static */ port::Status GpuDriver::GetGpuGCNArchName( + hipDevice_t device, std::string* gcnArchName) { + hipDeviceProp_t props; + hipError_t result = tensorflow::wrap::hipGetDeviceProperties(&props, device); + if (result == hipSuccess) { + *gcnArchName = props.gcnArchName; + return port::Status::OK(); + } + *gcnArchName = ""; + return port::Status{ + port::error::INTERNAL, + absl::StrFormat("failed to determine AMDGpu GCN Arch Name for device %d", + device)}; +} + // Helper function that turns the integer output of hipDeviceGetAttribute to // type T and wraps it in a StatusOr. template diff --git a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc index dbab0304d82..3926aeeb288 100644 --- a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc +++ b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc @@ -820,6 +820,12 @@ GpuExecutor::CreateDeviceDescription(int device_ordinal) { return status; } + string gcn_arch_name; + status = GpuDriver::GetGpuGCNArchName(device, &gcn_arch_name); + if (!status.ok()) { + return status; + } + internal::DeviceDescriptionBuilder builder; { @@ -888,7 +894,7 @@ GpuExecutor::CreateDeviceDescription(int device_ordinal) { } builder.set_platform_version( - absl::StrCat("AMDGPU ISA version: gfx", version)); + absl::StrCat("AMDGPU ISA version: ", gcn_arch_name)); // TODO(leary) should be a way to query this from the driver, but this is // unlikely to change for us any time soon. @@ -896,6 +902,8 @@ GpuExecutor::CreateDeviceDescription(int device_ordinal) { builder.set_device_vendor("Advanced Micro Devices, Inc"); builder.set_rocm_amdgpu_isa_version(version); + builder.set_rocm_amdgpu_gcn_arch_name(gcn_arch_name); + builder.set_shared_memory_per_core( GpuDriver::GetMaxSharedMemoryPerCore(device).ValueOrDie()); builder.set_shared_memory_per_block( diff --git a/tensorflow/stream_executor/tpu/c_api_decl.h b/tensorflow/stream_executor/tpu/c_api_decl.h index 71a725f5886..95af7303e77 100644 --- a/tensorflow/stream_executor/tpu/c_api_decl.h +++ b/tensorflow/stream_executor/tpu/c_api_decl.h @@ -140,6 +140,7 @@ typedef struct SE_DeviceDescription { int cuda_compute_capability_minor; int rocm_amdgpu_isa_version; + char* rocm_amdgpu_gcn_arch_name; int numa_node; int core_count;