adding calls to the newer hipMemset APIs
This commit is contained in:
parent
699ff43685
commit
1ef3d8eb92
@ -500,7 +500,7 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) {
|
|||||||
hipDeviceptr_t location,
|
hipDeviceptr_t location,
|
||||||
uint8 value, size_t size) {
|
uint8 value, size_t size) {
|
||||||
ScopedActivateContext activation{context};
|
ScopedActivateContext activation{context};
|
||||||
hipError_t res = tensorflow::wrap::hipMemset(location, value, size);
|
hipError_t res = tensorflow::wrap::hipMemsetD8(location, value, size);
|
||||||
if (res != hipSuccess) {
|
if (res != hipSuccess) {
|
||||||
LOG(ERROR) << "failed to memset memory: " << ToString(res);
|
LOG(ERROR) << "failed to memset memory: " << ToString(res);
|
||||||
return false;
|
return false;
|
||||||
@ -514,15 +514,7 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) {
|
|||||||
size_t uint32_count) {
|
size_t uint32_count) {
|
||||||
ScopedActivateContext activation{context};
|
ScopedActivateContext activation{context};
|
||||||
void* pointer = absl::bit_cast<void*>(location);
|
void* pointer = absl::bit_cast<void*>(location);
|
||||||
unsigned char valueC = static_cast<unsigned char>(value);
|
hipError_t res = tensorflow::wrap::hipMemsetD32(pointer, value, uint32_count);
|
||||||
uint32_t value32 = (valueC << 24) | (valueC << 16) | (valueC << 8) | (valueC);
|
|
||||||
if (value32 != value) {
|
|
||||||
// mismatch indicates case where hipMemsetAsyc can't emulate hipMemSetD32
|
|
||||||
LOG(ERROR) << "failed to memset memory";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
hipError_t res = tensorflow::wrap::hipMemset(pointer, static_cast<int>(value),
|
|
||||||
uint32_count * 4);
|
|
||||||
if (res != hipSuccess) {
|
if (res != hipSuccess) {
|
||||||
LOG(ERROR) << "failed to memset memory: " << ToString(res);
|
LOG(ERROR) << "failed to memset memory: " << ToString(res);
|
||||||
return false;
|
return false;
|
||||||
@ -553,17 +545,8 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) {
|
|||||||
GpuStreamHandle stream) {
|
GpuStreamHandle stream) {
|
||||||
ScopedActivateContext activation{context};
|
ScopedActivateContext activation{context};
|
||||||
void* pointer = absl::bit_cast<void*>(location);
|
void* pointer = absl::bit_cast<void*>(location);
|
||||||
|
hipError_t res =
|
||||||
// FIXME - need to set a 32-bit value here
|
tensorflow::wrap::hipMemsetD32Async(pointer, value, uint32_count, stream);
|
||||||
unsigned char valueC = static_cast<unsigned char>(value);
|
|
||||||
uint32_t value32 = (valueC << 24) | (valueC << 16) | (valueC << 8) | (valueC);
|
|
||||||
if (value32 != value) {
|
|
||||||
// mismatch indicates case where hipMemsetAsyc can't emulate hipMemSetD32
|
|
||||||
LOG(ERROR) << "failed to memset memory";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
hipError_t res = tensorflow::wrap::hipMemsetAsync(pointer, value,
|
|
||||||
uint32_count * 4, stream);
|
|
||||||
if (res != hipSuccess) {
|
if (res != hipSuccess) {
|
||||||
LOG(ERROR) << "failed to enqueue async memset operation: " << ToString(res);
|
LOG(ERROR) << "failed to enqueue async memset operation: " << ToString(res);
|
||||||
return false;
|
return false;
|
||||||
|
@ -108,7 +108,10 @@ namespace wrap {
|
|||||||
__macro(hipMemcpyHtoD) \
|
__macro(hipMemcpyHtoD) \
|
||||||
__macro(hipMemcpyHtoDAsync) \
|
__macro(hipMemcpyHtoDAsync) \
|
||||||
__macro(hipMemset) \
|
__macro(hipMemset) \
|
||||||
|
__macro(hipMemsetD32) \
|
||||||
|
__macro(hipMemsetD8) \
|
||||||
__macro(hipMemsetAsync) \
|
__macro(hipMemsetAsync) \
|
||||||
|
__macro(hipMemsetD32Async) \
|
||||||
__macro(hipModuleGetFunction) \
|
__macro(hipModuleGetFunction) \
|
||||||
__macro(hipModuleGetGlobal) \
|
__macro(hipModuleGetGlobal) \
|
||||||
__macro(hipModuleLaunchKernel) \
|
__macro(hipModuleLaunchKernel) \
|
||||||
|
Loading…
Reference in New Issue
Block a user