Replace remaining reference to GetCudaStream with GetGpuStream.

PiperOrigin-RevId: 255935906
This commit is contained in:
A. Unique TensorFlower 2019-07-01 05:29:47 -07:00 committed by TensorFlower Gardener
parent 3840910209
commit 108af9109e
5 changed files with 4 additions and 16 deletions

View File

@ -336,7 +336,7 @@ class DynamicPartitionOpGPU : public AsyncOpKernel {
Tensor* indices_out, DoneCallback done) {
int32 N = partitions->NumElements();
const GPUDevice& device = c->eigen_device<GPUDevice>();
const auto& cu_stream = GetCudaStream(c);
const auto& cu_stream = GetGpuStream(c);
// Initialize the indices_in tensor using the Range GPU kernel.
RangeInit(device, 0, 1, N, indices_in->flat<int32>());
@ -369,7 +369,7 @@ class DynamicPartitionOpGPU : public AsyncOpKernel {
Tensor* partition_count, Tensor* indices_out,
DoneCallback done) {
const GPUDevice& device = c->eigen_device<GPUDevice>();
const auto& cu_stream = GetCudaStream(c);
const auto& cu_stream = GetGpuStream(c);
int32 N = partitions->NumElements();
Tensor indices_in;
Tensor partitions_out;

View File

@ -381,7 +381,7 @@ class NonMaxSuppressionV2GPUOp : public OpKernel {
}
const int output_size = max_output_size.scalar<int>()();
size_t cub_sort_temp_storage_bytes = 0;
auto cuda_stream = GetCudaStream(context);
auto cuda_stream = GetGpuStream(context);
auto device = context->eigen_gpu_device();
// Calling cub with nullptrs as inputs will make it return
// workspace size needed for the operation instead of doing the operation.

View File

@ -148,7 +148,7 @@ class SoftmaxOpGPU : public OpKernel {
OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
{0}, 0, logits_in_.shape(), &softmax_out));
const auto& cu_stream = GetCudaStream(context);
const auto& cu_stream = GetGpuStream(context);
if (logits_in_.NumElements() > 0) {
Tensor max_logits;
Tensor sum_probs;

View File

@ -80,7 +80,6 @@ inline const char* GpuGetErrorString(hipError_t error) {
}
#endif
// Exact copy from GetCudaStream() in gpu_launch_config.h
// Returns a raw reference to the current cuda stream. Required by a
// number of kernel calls (for which StreamInterface* does not work),
// i.e. CUB and certain cublas primitives.

View File

@ -374,17 +374,6 @@ Gpu2DLaunchConfig GetGpu2DLaunchConfig(int xdim, int ydim,
CREATE_CUDA_HOST_FUNCTION_ALIAS(GetGpu2DLaunchConfig, GetCuda2DLaunchConfig);
#if GOOGLE_CUDA
// Returns a raw reference to the current cuda stream. Required by a
// number of kernel calls (for which StreamInterface* does not work), i.e.
// CUB and certain cublas primitives.
inline const cudaStream_t& GetCudaStream(OpKernelContext* context) {
const cudaStream_t* ptr = CHECK_NOTNULL(
reinterpret_cast<const cudaStream_t*>(context->op_device_context()
->stream()
->implementation()
->GpuStreamMemberHack()));
return *ptr;
}
template <typename DeviceFunc>
Cuda2DLaunchConfig GetCuda2DLaunchConfig(int xdim, int ydim,
const Eigen::GpuDevice& d,