Adding hooks in the Stream Executor API to get/set the AMDGPU gcnArchName device property
This commit is contained in:
parent
9a7c57d1d8
commit
d236afda36
@ -1770,15 +1770,11 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
|
||||
<< strings::HumanReadableNumBytes(description->memory_bandwidth())
|
||||
<< "/s";
|
||||
#elif TENSORFLOW_USE_ROCM
|
||||
int isa_version;
|
||||
if (!description->rocm_amdgpu_isa_version(&isa_version)) {
|
||||
// Logs internally on failure.
|
||||
isa_version = 0;
|
||||
}
|
||||
std::string gcn_arch_name = description->rocm_amdgpu_gcn_arch_name();
|
||||
LOG(INFO) << "Found device " << i << " with properties: "
|
||||
<< "\npciBusID: " << description->pci_bus_id()
|
||||
<< " name: " << description->name()
|
||||
<< " ROCm AMD GPU ISA: gfx" << isa_version
|
||||
<< " ROCm AMDGPU Arch: " << gcn_arch_name
|
||||
<< "\ncoreClock: " << description->clock_rate_ghz() << "GHz"
|
||||
<< " coreCount: " << description->core_count()
|
||||
<< " deviceMemorySize: "
|
||||
|
@ -1388,6 +1388,13 @@ GpuDriver::CreateMemoryHandle(GpuContext* context, uint64 bytes) {
|
||||
"Feature not supported on CUDA platform (GetGpuISAVersion)"};
|
||||
}
|
||||
|
||||
/* static */ port::Status GpuDriver::GetGpuGCNArchName(
|
||||
CUdevice device, std::string* gcnArchName) {
|
||||
return port::Status{
|
||||
port::error::INTERNAL,
|
||||
"Feature not supported on CUDA platform (GetGpuGCNArchName)"};
|
||||
}
|
||||
|
||||
// Helper function that turns the integer output of cuDeviceGetAttribute to type
|
||||
// T and wraps it in a StatusOr.
|
||||
template <typename T>
|
||||
|
@ -51,6 +51,7 @@ DeviceDescription::DeviceDescription()
|
||||
cuda_compute_capability_major_(-1),
|
||||
cuda_compute_capability_minor_(-1),
|
||||
rocm_amdgpu_isa_version_(-1),
|
||||
rocm_amdgpu_gcn_arch_name_(kUndefinedString),
|
||||
numa_node_(-1),
|
||||
core_count_(-1),
|
||||
ecc_enabled_(false) {}
|
||||
@ -95,6 +96,8 @@ std::unique_ptr<std::map<std::string, std::string>> DeviceDescription::ToMap()
|
||||
result["CUDA Compute Capability"] = absl::StrCat(
|
||||
cuda_compute_capability_major_, ".", cuda_compute_capability_minor_);
|
||||
|
||||
result["AMDGPU GCN Arch Name"] = absl::StrCat(rocm_amdgpu_gcn_arch_name_);
|
||||
|
||||
result["NUMA Node"] = absl::StrCat(numa_node());
|
||||
result["Core Count"] = absl::StrCat(core_count());
|
||||
result["ECC Enabled"] = absl::StrCat(ecc_enabled());
|
||||
|
@ -138,6 +138,13 @@ class DeviceDescription {
|
||||
// and the return value will be false.
|
||||
bool rocm_amdgpu_isa_version(int *version) const;
|
||||
|
||||
// Returns the
|
||||
// * AMDGPU GCN Architecture Name if we're running on the ROCm platform.
|
||||
// * kUndefinedString otherwise
|
||||
const std::string rocm_amdgpu_gcn_arch_name() const {
|
||||
return rocm_amdgpu_gcn_arch_name_;
|
||||
}
|
||||
|
||||
// Returns the maximum amount of shared memory present on a single core
|
||||
// (i.e. Streaming Multiprocessor on NVIDIA GPUs; Compute Unit for OpenCL
|
||||
// devices). Note that some devices, such as NVIDIA's have a configurable
|
||||
@ -203,6 +210,9 @@ class DeviceDescription {
|
||||
// ROCM AMDGPU ISA version, 0 if not available.
|
||||
int rocm_amdgpu_isa_version_;
|
||||
|
||||
// ROCm AMDGPU GCN Architecture name, "" if not available.
|
||||
std::string rocm_amdgpu_gcn_arch_name_;
|
||||
|
||||
int numa_node_;
|
||||
int core_count_;
|
||||
bool ecc_enabled_;
|
||||
@ -294,6 +304,10 @@ class DeviceDescriptionBuilder {
|
||||
device_description_->rocm_amdgpu_isa_version_ = version;
|
||||
}
|
||||
|
||||
void set_rocm_amdgpu_gcn_arch_name(const std::string& gcn_arch_name) {
|
||||
device_description_->rocm_amdgpu_gcn_arch_name_ = gcn_arch_name;
|
||||
}
|
||||
|
||||
void set_numa_node(int value) { device_description_->numa_node_ = value; }
|
||||
void set_core_count(int value) { device_description_->core_count_ = value; }
|
||||
void set_ecc_enabled(bool value) {
|
||||
|
@ -460,6 +460,12 @@ class GpuDriver {
|
||||
// (supported on ROCm only)
|
||||
static port::Status GetGpuISAVersion(int* version, GpuDeviceHandle device);
|
||||
|
||||
// Return the full GCN Architecture Name for the the device
|
||||
// for eg: amdgcn-amd-amdhsa--gfx908:sramecc+:xnack-
|
||||
// (supported on ROCm only)
|
||||
static port::Status GetGpuGCNArchName(GpuDeviceHandle device,
|
||||
std::string* gcnArchName);
|
||||
|
||||
// Returns the number of multiprocessors on the device (note that the device
|
||||
// may be multi-GPU-per-board).
|
||||
static port::StatusOr<int> GetMultiprocessorCount(GpuDeviceHandle device);
|
||||
|
@ -1080,6 +1080,21 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) {
|
||||
device)};
|
||||
}
|
||||
|
||||
/* static */ port::Status GpuDriver::GetGpuGCNArchName(
|
||||
hipDevice_t device, std::string* gcnArchName) {
|
||||
hipDeviceProp_t props;
|
||||
hipError_t result = tensorflow::wrap::hipGetDeviceProperties(&props, device);
|
||||
if (result == hipSuccess) {
|
||||
*gcnArchName = props.gcnArchName;
|
||||
return port::Status::OK();
|
||||
}
|
||||
*gcnArchName = "";
|
||||
return port::Status{
|
||||
port::error::INTERNAL,
|
||||
absl::StrFormat("failed to determine AMDGpu GCN Arch Name for device %d",
|
||||
device)};
|
||||
}
|
||||
|
||||
// Helper function that turns the integer output of hipDeviceGetAttribute to
|
||||
// type T and wraps it in a StatusOr.
|
||||
template <typename T>
|
||||
|
@ -820,6 +820,12 @@ GpuExecutor::CreateDeviceDescription(int device_ordinal) {
|
||||
return status;
|
||||
}
|
||||
|
||||
string gcn_arch_name;
|
||||
status = GpuDriver::GetGpuGCNArchName(device, &gcn_arch_name);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
|
||||
internal::DeviceDescriptionBuilder builder;
|
||||
|
||||
{
|
||||
@ -888,7 +894,7 @@ GpuExecutor::CreateDeviceDescription(int device_ordinal) {
|
||||
}
|
||||
|
||||
builder.set_platform_version(
|
||||
absl::StrCat("AMDGPU ISA version: gfx", version));
|
||||
absl::StrCat("AMDGPU ISA version: ", gcn_arch_name));
|
||||
|
||||
// TODO(leary) should be a way to query this from the driver, but this is
|
||||
// unlikely to change for us any time soon.
|
||||
@ -896,6 +902,8 @@ GpuExecutor::CreateDeviceDescription(int device_ordinal) {
|
||||
|
||||
builder.set_device_vendor("Advanced Micro Devices, Inc");
|
||||
builder.set_rocm_amdgpu_isa_version(version);
|
||||
builder.set_rocm_amdgpu_gcn_arch_name(gcn_arch_name);
|
||||
|
||||
builder.set_shared_memory_per_core(
|
||||
GpuDriver::GetMaxSharedMemoryPerCore(device).ValueOrDie());
|
||||
builder.set_shared_memory_per_block(
|
||||
|
@ -140,6 +140,7 @@ typedef struct SE_DeviceDescription {
|
||||
int cuda_compute_capability_minor;
|
||||
|
||||
int rocm_amdgpu_isa_version;
|
||||
char* rocm_amdgpu_gcn_arch_name;
|
||||
|
||||
int numa_node;
|
||||
int core_count;
|
||||
|
Loading…
Reference in New Issue
Block a user