[ROCm] Fix for the ROCm CSB breakage - 200604
This PR addresses 2 unit test failures introduced by the following commit
bd20260350
failing unit tests
```
//tensorflow/core/common_runtime/gpu:gpu_device_test
//tensorflow/core/common_runtime/gpu:gpu_device_unified_memory_test
```
It is different from PR #40164, which fixes the build error on the ROCm platform.
The commit above adds unit tests that check the assignment of priority values to GPU streams. Because ROCm support for assigning priority values to GPU streams is missing, those unit tests fail.
This PR/commit adds the missing ROCm support, and updates the unit test to work with AMD GPUs too. The valid priority value range seems to be different for AMD GPUs (0,2) as compared to NVidia GPUs (-1, 0), and hence the changes requrired in the testcases too.
This commit is contained in:
parent
1af42f028b
commit
856f899e4e
|
@ -847,7 +847,7 @@ Status VerifyVirtualDeviceSettings(
|
||||||
" #valid GPUs: ", valid_platform_gpu_ids.size(),
|
" #valid GPUs: ", valid_platform_gpu_ids.size(),
|
||||||
" virtual_devices.size(): ", virtual_devices.size());
|
" virtual_devices.size(): ", virtual_devices.size());
|
||||||
}
|
}
|
||||||
#if GOOGLE_CUDA
|
#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
|
||||||
// Check memory_limt_mb and priority sizes match if priority is non-empty.
|
// Check memory_limt_mb and priority sizes match if priority is non-empty.
|
||||||
bool priority_exists = !virtual_devices.Get(0).priority().empty();
|
bool priority_exists = !virtual_devices.Get(0).priority().empty();
|
||||||
for (int i = 0; i < virtual_devices.size(); ++i) {
|
for (int i = 0; i < virtual_devices.size(); ++i) {
|
||||||
|
@ -893,15 +893,6 @@ Status VerifyVirtualDeviceSettings(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#elif TENSORFLOW_USE_ROCM
|
|
||||||
for (int i = 0; i < virtual_devices.size(); ++i) {
|
|
||||||
if (!virtual_devices.Get(i).priority().empty()) {
|
|
||||||
return errors::InvalidArgument(
|
|
||||||
"Priority is supported only on Nvidia GPUs."
|
|
||||||
" However, priority is set for virtual device ",
|
|
||||||
i, ", which corresponds to a non Nvidia GPU");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
|
@ -1185,6 +1176,18 @@ Status BaseGPUDeviceFactory::CreateDevices(
|
||||||
platform_gpu_id.value(),
|
platform_gpu_id.value(),
|
||||||
" failed. Status: ", hipGetErrorString(err));
|
" failed. Status: ", hipGetErrorString(err));
|
||||||
}
|
}
|
||||||
|
int priority_low, priority_high;
|
||||||
|
hipDeviceGetStreamPriorityRange(&priority_low, &priority_high);
|
||||||
|
if (err != hipSuccess) {
|
||||||
|
return errors::Internal(
|
||||||
|
"hipDeviceGetStreamPriorityRange() on GPU:", original_device,
|
||||||
|
" failed. Status: ", hipGetErrorString(err));
|
||||||
|
}
|
||||||
|
VLOG(1) << "HIP stream priority range on GPU(" << original_device
|
||||||
|
<< "): " << priority_high << "," << priority_low;
|
||||||
|
supported_priority_ranges.insert(
|
||||||
|
std::make_pair(platform_gpu_id.value(),
|
||||||
|
std::make_pair(priority_low, priority_high)));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
// Reset to the original device.
|
// Reset to the original device.
|
||||||
|
|
|
@ -229,52 +229,89 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndNoPriority) {
|
||||||
|
|
||||||
TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
|
TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) {
|
||||||
{
|
{
|
||||||
// Priority outside the range (-1, 0).
|
#if TENSORFLOW_USE_ROCM
|
||||||
|
// Priority outside the range (0, 2) for AMD GPUs
|
||||||
|
SessionOptions opts =
|
||||||
|
MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}});
|
||||||
|
#else
|
||||||
|
// Priority outside the range (-1, 0) for NVidia GPUs
|
||||||
SessionOptions opts =
|
SessionOptions opts =
|
||||||
MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-2, 0}});
|
MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-2, 0}});
|
||||||
|
#endif
|
||||||
std::vector<std::unique_ptr<Device>> devices;
|
std::vector<std::unique_ptr<Device>> devices;
|
||||||
Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
|
Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
|
||||||
opts, kDeviceNamePrefix, &devices);
|
opts, kDeviceNamePrefix, &devices);
|
||||||
EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
|
EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
|
||||||
|
#if TENSORFLOW_USE_ROCM
|
||||||
|
ExpectErrorMessageSubstr(
|
||||||
|
status,
|
||||||
|
"Priority -1 is outside the range of supported priorities [0,2] for"
|
||||||
|
" virtual device 0 on GPU# 0");
|
||||||
|
#else
|
||||||
ExpectErrorMessageSubstr(
|
ExpectErrorMessageSubstr(
|
||||||
status,
|
status,
|
||||||
"Priority -2 is outside the range of supported priorities [-1,0] for"
|
"Priority -2 is outside the range of supported priorities [-1,0] for"
|
||||||
" virtual device 0 on GPU# 0");
|
" virtual device 0 on GPU# 0");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// Priority outside the range (-1, 0).
|
#if TENSORFLOW_USE_ROCM
|
||||||
|
// Priority outside the range (0, 2) for AMD GPUs
|
||||||
|
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 3}});
|
||||||
|
#else
|
||||||
|
// Priority outside the range (-1, 0) for NVidia GPUs
|
||||||
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
|
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
|
||||||
|
#endif
|
||||||
std::vector<std::unique_ptr<Device>> devices;
|
std::vector<std::unique_ptr<Device>> devices;
|
||||||
Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
|
Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
|
||||||
opts, kDeviceNamePrefix, &devices);
|
opts, kDeviceNamePrefix, &devices);
|
||||||
EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
|
EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
|
||||||
|
#if TENSORFLOW_USE_ROCM
|
||||||
|
ExpectErrorMessageSubstr(
|
||||||
|
status,
|
||||||
|
"Priority 3 is outside the range of supported priorities [0,2] for"
|
||||||
|
" virtual device 0 on GPU# 0");
|
||||||
|
#else
|
||||||
ExpectErrorMessageSubstr(
|
ExpectErrorMessageSubstr(
|
||||||
status,
|
status,
|
||||||
"Priority 1 is outside the range of supported priorities [-1,0] for"
|
"Priority 1 is outside the range of supported priorities [-1,0] for"
|
||||||
" virtual device 0 on GPU# 0");
|
" virtual device 0 on GPU# 0");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndPriority) {
|
TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndPriority) {
|
||||||
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123}}, {{-1}});
|
// 0 is a valid priority value for both AMD and NVidia GPUs
|
||||||
|
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123}}, {{0}});
|
||||||
std::vector<std::unique_ptr<Device>> devices;
|
std::vector<std::unique_ptr<Device>> devices;
|
||||||
TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
|
TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
|
||||||
opts, kDeviceNamePrefix, &devices));
|
opts, kDeviceNamePrefix, &devices));
|
||||||
EXPECT_EQ(1, devices.size());
|
EXPECT_EQ(1, devices.size());
|
||||||
EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
|
EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
|
||||||
EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(GPUDeviceTest, MultipleVirtualDevices) {
|
TEST_F(GPUDeviceTest, MultipleVirtualDevices) {
|
||||||
|
#if TENSORFLOW_USE_ROCM
|
||||||
|
// Valid range for priority values on AMD GPUs in (0,2)
|
||||||
|
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}});
|
||||||
|
#else
|
||||||
|
// Valid range for priority values on NVidia GPUs in (-1, 0)
|
||||||
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, -1}});
|
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, -1}});
|
||||||
|
#endif
|
||||||
std::vector<std::unique_ptr<Device>> devices;
|
std::vector<std::unique_ptr<Device>> devices;
|
||||||
TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
|
TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
|
||||||
opts, kDeviceNamePrefix, &devices));
|
opts, kDeviceNamePrefix, &devices));
|
||||||
EXPECT_EQ(2, devices.size());
|
EXPECT_EQ(2, devices.size());
|
||||||
EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
|
EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
|
||||||
EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
|
EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
|
||||||
|
#if TENSORFLOW_USE_ROCM
|
||||||
|
EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
||||||
|
EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
|
||||||
|
#else
|
||||||
EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
||||||
EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
|
EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
|
||||||
|
#endif
|
||||||
ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size());
|
ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size());
|
||||||
ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size());
|
ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size());
|
||||||
EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id());
|
EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id());
|
||||||
|
@ -292,7 +329,8 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevices) {
|
||||||
TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) {
|
TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) {
|
||||||
{
|
{
|
||||||
// Multile virtual devices with fewer priorities.
|
// Multile virtual devices with fewer priorities.
|
||||||
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1}});
|
// 0 is a valid priority value for both AMD and NVidia GPUs
|
||||||
|
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0}});
|
||||||
std::vector<std::unique_ptr<Device>> devices;
|
std::vector<std::unique_ptr<Device>> devices;
|
||||||
Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
|
Status status = DeviceFactory::GetFactory("GPU")->CreateDevices(
|
||||||
opts, kDeviceNamePrefix, &devices);
|
opts, kDeviceNamePrefix, &devices);
|
||||||
|
@ -305,16 +343,27 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) {
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// Multile virtual devices with matching priority.
|
// Multile virtual devices with matching priority.
|
||||||
|
#if TENSORFLOW_USE_ROCM
|
||||||
|
// Valid range for priority values on AMD GPUs in (0,2)
|
||||||
|
SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{2, 1}});
|
||||||
|
#else
|
||||||
|
// Valid range for priority values on NVidia GPUs in (-1, 0)
|
||||||
SessionOptions opts =
|
SessionOptions opts =
|
||||||
MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 0}});
|
MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 0}});
|
||||||
|
#endif
|
||||||
std::vector<std::unique_ptr<Device>> devices;
|
std::vector<std::unique_ptr<Device>> devices;
|
||||||
TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
|
TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices(
|
||||||
opts, kDeviceNamePrefix, &devices));
|
opts, kDeviceNamePrefix, &devices));
|
||||||
EXPECT_EQ(2, devices.size());
|
EXPECT_EQ(2, devices.size());
|
||||||
EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
|
EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit());
|
||||||
EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
|
EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit());
|
||||||
|
#if TENSORFLOW_USE_ROCM
|
||||||
|
EXPECT_EQ(2, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
||||||
|
EXPECT_EQ(1, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
|
||||||
|
#else
|
||||||
EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
EXPECT_EQ(-1, static_cast<BaseGPUDevice*>(devices[0].get())->priority());
|
||||||
EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
|
EXPECT_EQ(0, static_cast<BaseGPUDevice*>(devices[1].get())->priority());
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -560,14 +560,15 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) {
|
||||||
/* static */ bool GpuDriver::CreateStream(GpuContext* context,
|
/* static */ bool GpuDriver::CreateStream(GpuContext* context,
|
||||||
GpuStreamHandle* stream,
|
GpuStreamHandle* stream,
|
||||||
int priority) {
|
int priority) {
|
||||||
if (priority != 0) {
|
|
||||||
LOG(ERROR) << "ROCM stream doesn't support priority. "
|
|
||||||
<< " Should be set to 0 but given: " << priority;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
ScopedActivateContext activated{context};
|
ScopedActivateContext activated{context};
|
||||||
hipError_t res = tensorflow::wrap::hipStreamCreateWithFlags(
|
hipError_t res;
|
||||||
stream, hipStreamDefault); // switch to hipStreamNonBlocking?
|
if (priority == 0) {
|
||||||
|
res = tensorflow::wrap::hipStreamCreateWithFlags(
|
||||||
|
stream, hipStreamDefault); // switch to hipStreamNonBlocking?
|
||||||
|
} else {
|
||||||
|
res = tensorflow::wrap::hipStreamCreateWithPriority(
|
||||||
|
stream, hipStreamDefault, priority); // switch to hipStreamNonBlocking?
|
||||||
|
}
|
||||||
if (res != hipSuccess) {
|
if (res != hipSuccess) {
|
||||||
LOG(ERROR) << "could not allocate ROCM stream for device "
|
LOG(ERROR) << "could not allocate ROCM stream for device "
|
||||||
<< context->device_ordinal() << ": " << ToString(res);
|
<< context->device_ordinal() << ": " << ToString(res);
|
||||||
|
|
|
@ -117,6 +117,7 @@ namespace wrap {
|
||||||
__macro(hipSetDevice) \
|
__macro(hipSetDevice) \
|
||||||
__macro(hipStreamAddCallback) \
|
__macro(hipStreamAddCallback) \
|
||||||
__macro(hipStreamCreateWithFlags) \
|
__macro(hipStreamCreateWithFlags) \
|
||||||
|
__macro(hipStreamCreateWithPriority) \
|
||||||
__macro(hipStreamDestroy) \
|
__macro(hipStreamDestroy) \
|
||||||
__macro(hipStreamQuery) \
|
__macro(hipStreamQuery) \
|
||||||
__macro(hipStreamSynchronize) \
|
__macro(hipStreamSynchronize) \
|
||||||
|
|
Loading…
Reference in New Issue