diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index e47f56b2624..04b7f9d6082 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -847,7 +847,7 @@ Status VerifyVirtualDeviceSettings( " #valid GPUs: ", valid_platform_gpu_ids.size(), " virtual_devices.size(): ", virtual_devices.size()); } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Check memory_limt_mb and priority sizes match if priority is non-empty. bool priority_exists = !virtual_devices.Get(0).priority().empty(); for (int i = 0; i < virtual_devices.size(); ++i) { @@ -893,15 +893,6 @@ Status VerifyVirtualDeviceSettings( } } } -#elif TENSORFLOW_USE_ROCM - for (int i = 0; i < virtual_devices.size(); ++i) { - if (!virtual_devices.Get(i).priority().empty()) { - return errors::InvalidArgument( - "Priority is supported only on Nvidia GPUs." - " However, priority is set for virtual device ", - i, ", which corresponds to a non Nvidia GPU"); - } - } #endif return Status::OK(); @@ -1185,6 +1176,18 @@ Status BaseGPUDeviceFactory::CreateDevices( platform_gpu_id.value(), " failed. Status: ", hipGetErrorString(err)); } + int priority_low, priority_high; + hipDeviceGetStreamPriorityRange(&priority_low, &priority_high); + if (err != hipSuccess) { + return errors::Internal( + "hipDeviceGetStreamPriorityRange() on GPU:", original_device, + " failed. Status: ", hipGetErrorString(err)); + } + VLOG(1) << "HIP stream priority range on GPU(" << original_device + << "): " << priority_high << "," << priority_low; + supported_priority_ranges.insert( + std::make_pair(platform_gpu_id.value(), + std::make_pair(priority_low, priority_high))); #endif } // Reset to the original device. diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc index 26312d35af6..1703d926f9f 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc @@ -229,52 +229,89 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndNoPriority) { TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { { - // Priority outside the range (-1, 0). +#if TENSORFLOW_USE_ROCM + // Priority outside the range (0, 2) for AMD GPUs + SessionOptions opts = + MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}}); +#else + // Priority outside the range (-1, 0) for NVidia GPUs SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-2, 0}}); +#endif std::vector> devices; Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); +#if TENSORFLOW_USE_ROCM + ExpectErrorMessageSubstr( + status, + "Priority -1 is outside the range of supported priorities [0,2] for" + " virtual device 0 on GPU# 0"); +#else ExpectErrorMessageSubstr( status, "Priority -2 is outside the range of supported priorities [-1,0] for" " virtual device 0 on GPU# 0"); +#endif } { - // Priority outside the range (-1, 0). +#if TENSORFLOW_USE_ROCM + // Priority outside the range (0, 2) for AMD GPUs + SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 3}}); +#else + // Priority outside the range (-1, 0) for NVidia GPUs SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}}); +#endif std::vector> devices; Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); +#if TENSORFLOW_USE_ROCM + ExpectErrorMessageSubstr( + status, + "Priority 3 is outside the range of supported priorities [0,2] for" + " virtual device 0 on GPU# 0"); +#else ExpectErrorMessageSubstr( status, "Priority 1 is outside the range of supported priorities [-1,0] for" " virtual device 0 on GPU# 0"); +#endif } } TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndPriority) { - SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123}}, {{-1}}); + // 0 is a valid priority value for both AMD and NVidia GPUs + SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123}}, {{0}}); std::vector> devices; TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices)); EXPECT_EQ(1, devices.size()); EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); - EXPECT_EQ(-1, static_cast(devices[0].get())->priority()); + EXPECT_EQ(0, static_cast(devices[0].get())->priority()); } TEST_F(GPUDeviceTest, MultipleVirtualDevices) { +#if TENSORFLOW_USE_ROCM + // Valid range for priority values on AMD GPUs in (0,2) + SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}}); +#else + // Valid range for priority values on NVidia GPUs in (-1, 0) SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, -1}}); +#endif std::vector> devices; TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices)); EXPECT_EQ(2, devices.size()); EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit()); +#if TENSORFLOW_USE_ROCM + EXPECT_EQ(0, static_cast(devices[0].get())->priority()); + EXPECT_EQ(1, static_cast(devices[1].get())->priority()); +#else EXPECT_EQ(0, static_cast(devices[0].get())->priority()); EXPECT_EQ(-1, static_cast(devices[1].get())->priority()); +#endif ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size()); ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size()); EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id()); @@ -292,7 +329,8 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevices) { TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) { { // Multile virtual devices with fewer priorities. - SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1}}); + // 0 is a valid priority value for both AMD and NVidia GPUs + SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0}}); std::vector> devices; Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); @@ -305,16 +343,27 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) { } { // Multile virtual devices with matching priority. +#if TENSORFLOW_USE_ROCM + // Valid range for priority values on AMD GPUs in (0,2) + SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{2, 1}}); +#else + // Valid range for priority values on NVidia GPUs in (-1, 0) SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 0}}); +#endif std::vector> devices; TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices)); EXPECT_EQ(2, devices.size()); EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit()); +#if TENSORFLOW_USE_ROCM + EXPECT_EQ(2, static_cast(devices[0].get())->priority()); + EXPECT_EQ(1, static_cast(devices[1].get())->priority()); +#else EXPECT_EQ(-1, static_cast(devices[0].get())->priority()); EXPECT_EQ(0, static_cast(devices[1].get())->priority()); +#endif } } diff --git a/tensorflow/stream_executor/rocm/rocm_driver.cc b/tensorflow/stream_executor/rocm/rocm_driver.cc index 5a8154f1df8..a070979e71d 100644 --- a/tensorflow/stream_executor/rocm/rocm_driver.cc +++ b/tensorflow/stream_executor/rocm/rocm_driver.cc @@ -560,14 +560,15 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) { /* static */ bool GpuDriver::CreateStream(GpuContext* context, GpuStreamHandle* stream, int priority) { - if (priority != 0) { - LOG(ERROR) << "ROCM stream doesn't support priority. " - << " Should be set to 0 but given: " << priority; - return false; - } ScopedActivateContext activated{context}; - hipError_t res = tensorflow::wrap::hipStreamCreateWithFlags( - stream, hipStreamDefault); // switch to hipStreamNonBlocking? + hipError_t res; + if (priority == 0) { + res = tensorflow::wrap::hipStreamCreateWithFlags( + stream, hipStreamDefault); // switch to hipStreamNonBlocking? + } else { + res = tensorflow::wrap::hipStreamCreateWithPriority( + stream, hipStreamDefault, priority); // switch to hipStreamNonBlocking? + } if (res != hipSuccess) { LOG(ERROR) << "could not allocate ROCM stream for device " << context->device_ordinal() << ": " << ToString(res); diff --git a/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h b/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h index bc5b6a87888..e09f7eb0eb2 100644 --- a/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h +++ b/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h @@ -117,6 +117,7 @@ namespace wrap { __macro(hipSetDevice) \ __macro(hipStreamAddCallback) \ __macro(hipStreamCreateWithFlags) \ + __macro(hipStreamCreateWithPriority) \ __macro(hipStreamDestroy) \ __macro(hipStreamQuery) \ __macro(hipStreamSynchronize) \