adding calls to the newer hipMemset APIs

This commit is contained in:
Deven Desai 2019-03-08 18:16:07 +00:00
parent 699ff43685
commit 1ef3d8eb92
2 changed files with 7 additions and 21 deletions

View File

@ -500,7 +500,7 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) {
hipDeviceptr_t location, hipDeviceptr_t location,
uint8 value, size_t size) { uint8 value, size_t size) {
ScopedActivateContext activation{context}; ScopedActivateContext activation{context};
hipError_t res = tensorflow::wrap::hipMemset(location, value, size); hipError_t res = tensorflow::wrap::hipMemsetD8(location, value, size);
if (res != hipSuccess) { if (res != hipSuccess) {
LOG(ERROR) << "failed to memset memory: " << ToString(res); LOG(ERROR) << "failed to memset memory: " << ToString(res);
return false; return false;
@ -514,15 +514,7 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) {
size_t uint32_count) { size_t uint32_count) {
ScopedActivateContext activation{context}; ScopedActivateContext activation{context};
void* pointer = absl::bit_cast<void*>(location); void* pointer = absl::bit_cast<void*>(location);
unsigned char valueC = static_cast<unsigned char>(value); hipError_t res = tensorflow::wrap::hipMemsetD32(pointer, value, uint32_count);
uint32_t value32 = (valueC << 24) | (valueC << 16) | (valueC << 8) | (valueC);
if (value32 != value) {
// mismatch indicates case where hipMemsetAsyc can't emulate hipMemSetD32
LOG(ERROR) << "failed to memset memory";
return false;
}
hipError_t res = tensorflow::wrap::hipMemset(pointer, static_cast<int>(value),
uint32_count * 4);
if (res != hipSuccess) { if (res != hipSuccess) {
LOG(ERROR) << "failed to memset memory: " << ToString(res); LOG(ERROR) << "failed to memset memory: " << ToString(res);
return false; return false;
@ -553,17 +545,8 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) {
GpuStreamHandle stream) { GpuStreamHandle stream) {
ScopedActivateContext activation{context}; ScopedActivateContext activation{context};
void* pointer = absl::bit_cast<void*>(location); void* pointer = absl::bit_cast<void*>(location);
hipError_t res =
// FIXME - need to set a 32-bit value here tensorflow::wrap::hipMemsetD32Async(pointer, value, uint32_count, stream);
unsigned char valueC = static_cast<unsigned char>(value);
uint32_t value32 = (valueC << 24) | (valueC << 16) | (valueC << 8) | (valueC);
if (value32 != value) {
// mismatch indicates case where hipMemsetAsyc can't emulate hipMemSetD32
LOG(ERROR) << "failed to memset memory";
return false;
}
hipError_t res = tensorflow::wrap::hipMemsetAsync(pointer, value,
uint32_count * 4, stream);
if (res != hipSuccess) { if (res != hipSuccess) {
LOG(ERROR) << "failed to enqueue async memset operation: " << ToString(res); LOG(ERROR) << "failed to enqueue async memset operation: " << ToString(res);
return false; return false;

View File

@ -108,7 +108,10 @@ namespace wrap {
__macro(hipMemcpyHtoD) \ __macro(hipMemcpyHtoD) \
__macro(hipMemcpyHtoDAsync) \ __macro(hipMemcpyHtoDAsync) \
__macro(hipMemset) \ __macro(hipMemset) \
__macro(hipMemsetD32) \
__macro(hipMemsetD8) \
__macro(hipMemsetAsync) \ __macro(hipMemsetAsync) \
__macro(hipMemsetD32Async) \
__macro(hipModuleGetFunction) \ __macro(hipModuleGetFunction) \
__macro(hipModuleGetGlobal) \ __macro(hipModuleGetGlobal) \
__macro(hipModuleLaunchKernel) \ __macro(hipModuleLaunchKernel) \