Replace remaining reference to GetCudaStream with GetGpuStream.
PiperOrigin-RevId: 255935906
This commit is contained in:
parent
3840910209
commit
108af9109e
@ -336,7 +336,7 @@ class DynamicPartitionOpGPU : public AsyncOpKernel {
|
||||
Tensor* indices_out, DoneCallback done) {
|
||||
int32 N = partitions->NumElements();
|
||||
const GPUDevice& device = c->eigen_device<GPUDevice>();
|
||||
const auto& cu_stream = GetCudaStream(c);
|
||||
const auto& cu_stream = GetGpuStream(c);
|
||||
|
||||
// Initialize the indices_in tensor using the Range GPU kernel.
|
||||
RangeInit(device, 0, 1, N, indices_in->flat<int32>());
|
||||
@ -369,7 +369,7 @@ class DynamicPartitionOpGPU : public AsyncOpKernel {
|
||||
Tensor* partition_count, Tensor* indices_out,
|
||||
DoneCallback done) {
|
||||
const GPUDevice& device = c->eigen_device<GPUDevice>();
|
||||
const auto& cu_stream = GetCudaStream(c);
|
||||
const auto& cu_stream = GetGpuStream(c);
|
||||
int32 N = partitions->NumElements();
|
||||
Tensor indices_in;
|
||||
Tensor partitions_out;
|
||||
|
@ -381,7 +381,7 @@ class NonMaxSuppressionV2GPUOp : public OpKernel {
|
||||
}
|
||||
const int output_size = max_output_size.scalar<int>()();
|
||||
size_t cub_sort_temp_storage_bytes = 0;
|
||||
auto cuda_stream = GetCudaStream(context);
|
||||
auto cuda_stream = GetGpuStream(context);
|
||||
auto device = context->eigen_gpu_device();
|
||||
// Calling cub with nullptrs as inputs will make it return
|
||||
// workspace size needed for the operation instead of doing the operation.
|
||||
|
@ -148,7 +148,7 @@ class SoftmaxOpGPU : public OpKernel {
|
||||
OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
|
||||
{0}, 0, logits_in_.shape(), &softmax_out));
|
||||
|
||||
const auto& cu_stream = GetCudaStream(context);
|
||||
const auto& cu_stream = GetGpuStream(context);
|
||||
if (logits_in_.NumElements() > 0) {
|
||||
Tensor max_logits;
|
||||
Tensor sum_probs;
|
||||
|
@ -80,7 +80,6 @@ inline const char* GpuGetErrorString(hipError_t error) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// Exact copy from GetCudaStream() in gpu_launch_config.h
|
||||
// Returns a raw reference to the current cuda stream. Required by a
|
||||
// number of kernel calls (for which StreamInterface* does not work),
|
||||
// i.e. CUB and certain cublas primitives.
|
||||
|
@ -374,17 +374,6 @@ Gpu2DLaunchConfig GetGpu2DLaunchConfig(int xdim, int ydim,
|
||||
CREATE_CUDA_HOST_FUNCTION_ALIAS(GetGpu2DLaunchConfig, GetCuda2DLaunchConfig);
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
// Returns a raw reference to the current cuda stream. Required by a
|
||||
// number of kernel calls (for which StreamInterface* does not work), i.e.
|
||||
// CUB and certain cublas primitives.
|
||||
inline const cudaStream_t& GetCudaStream(OpKernelContext* context) {
|
||||
const cudaStream_t* ptr = CHECK_NOTNULL(
|
||||
reinterpret_cast<const cudaStream_t*>(context->op_device_context()
|
||||
->stream()
|
||||
->implementation()
|
||||
->GpuStreamMemberHack()));
|
||||
return *ptr;
|
||||
}
|
||||
template <typename DeviceFunc>
|
||||
Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim,
|
||||
const Eigen::GpuDevice& d,
|
||||
|
Loading…
Reference in New Issue
Block a user