diff --git a/tensorflow/stream_executor/gpu/gpu_types.h b/tensorflow/stream_executor/gpu/gpu_types.h index c69177d0760..64a6e5e5efc 100644 --- a/tensorflow/stream_executor/gpu/gpu_types.h +++ b/tensorflow/stream_executor/gpu/gpu_types.h @@ -20,6 +20,8 @@ limitations under the License. #if TENSORFLOW_USE_ROCM +#define __HIP_DISABLE_CPP_FUNCTIONS__ + #include "rocm/include/hip/hip_complex.h" #include "rocm/include/hip/hip_runtime.h" #include "rocm/include/hiprand/hiprand.h" diff --git a/tensorflow/stream_executor/rocm/rocm_driver.cc b/tensorflow/stream_executor/rocm/rocm_driver.cc index 1b0b91426aa..94feef06d86 100644 --- a/tensorflow/stream_executor/rocm/rocm_driver.cc +++ b/tensorflow/stream_executor/rocm/rocm_driver.cc @@ -671,7 +671,7 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) { uint64 bytes) { ScopedActivateContext activated{context}; hipDeviceptr_t result = 0; - hipError_t res = tensorflow::wrap::hipMallocVanilla(&result, bytes); + hipError_t res = tensorflow::wrap::hipMalloc(&result, bytes); if (res != hipSuccess) { LOG(ERROR) << "failed to allocate " << port::HumanReadableNumBytes::ToString(bytes) << " (" << bytes @@ -717,7 +717,7 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) { ScopedActivateContext activation{context}; void* host_mem = nullptr; // "Portable" memory is visible to all ROCM contexts. Safe for our use model. - hipError_t res = tensorflow::wrap::hipHostMallocVanilla( + hipError_t res = tensorflow::wrap::hipHostMalloc( &host_mem, bytes, hipHostMallocPortable); if (res != hipSuccess) { LOG(ERROR) << "failed to alloc " << bytes diff --git a/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h b/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h index 27495c2cbc0..c855bfb36a8 100644 --- a/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h +++ b/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h @@ -20,6 +20,8 @@ limitations under the License. #ifndef TENSORFLOW_STREAM_EXECUTOR_ROCM_ROCM_DRIVER_WRAPPER_H_ #define TENSORFLOW_STREAM_EXECUTOR_ROCM_ROCM_DRIVER_WRAPPER_H_ +#define __HIP_DISABLE_CPP_FUNCTIONS__ + #include "rocm/include/hip/hip_runtime.h" #include "tensorflow/stream_executor/lib/env.h" #include "tensorflow/stream_executor/platform/dso_loader.h" @@ -48,21 +50,6 @@ namespace wrap { #define TO_STR_(x) #x #define TO_STR(x) TO_STR_(x) -// hipMalloc and hipHostMalloc are defined as funtion templates in the -// HIP header files, and hence their names get mangled and the attempt -// to resolve their name when trying to dynamically load them will fail -// Updating the HIP header files to make them C functions is underway. -// Until that change flows through, we will workaround the issue by -// creating dummy wrappers for them here - -hipError_t hipMallocVanilla(void** ptr, size_t size) { - return hipErrorNotInitialized; -} - -hipError_t hipHostMallocVanilla(void** ptr, size_t size, unsigned int flags) { - return hipErrorNotInitialized; -} - #define STREAM_EXECUTOR_HIP_WRAP(hipSymbolName) \ template \ auto hipSymbolName(Args... args)->decltype(::hipSymbolName(args...)) { \ @@ -107,9 +94,11 @@ hipError_t hipHostMallocVanilla(void** ptr, size_t size, unsigned int flags) { __macro(hipGetDeviceCount) \ __macro(hipGetDeviceProperties) \ __macro(hipHostFree) \ + __macro(hipHostMalloc) \ __macro(hipHostRegister) \ __macro(hipHostUnregister) \ __macro(hipInit) \ + __macro(hipMalloc) \ __macro(hipMemGetAddressRange) \ __macro(hipMemGetInfo) \ __macro(hipMemcpyDtoD) \