From f046d2f2141459827206068462608cf0728d194e Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Wed, 9 Sep 2020 20:51:11 -0700 Subject: [PATCH] Roll forward the original CL. PiperOrigin-RevId: 330857639 Change-Id: I1d6da52a031deb5962d472e800649f4d5d14c6d9 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 102 +- tensorflow/stream_executor/cuda/cuda_dnn.h | 40 +- tensorflow/stream_executor/dnn.h | 25 +- tensorflow/stream_executor/rocm/rocm_dnn.cc | 20 +- tensorflow/stream_executor/rocm/rocm_dnn.h | 38 +- tensorflow/stream_executor/stream.cc | 2029 +++++++++---------- tensorflow/stream_executor/stream.h | 9 +- 7 files changed, 1082 insertions(+), 1181 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 3b3323b4f42..ac7aac9b105 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -3703,7 +3703,7 @@ port::Status CudnnSupport::DoBatchNormalizationBackwardImpl( return port::Status::OK(); } -bool CudnnSupport::DoFusedConvolve( +port::Status CudnnSupport::DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, double conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -3716,18 +3716,16 @@ bool CudnnSupport::DoFusedConvolve( DeviceMemory* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - return IsStatusOk( - DoFusedConvolveImpl( - stream, conv_input_descriptor, conv_input_data, conv_input_scale, - filter_descriptor, filter_data, convolution_descriptor, - side_input_data, side_input_scale, bias_descriptor, biases, - activation_mode, output_descriptor, output_data, - GetConvAccumulatorType(dnn::DataType::kDouble), scratch_allocator, - algorithm_config, output_profile_result), - /*report_error=*/!output_profile_result); + return DoFusedConvolveImpl( + stream, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, side_input_data, + side_input_scale, bias_descriptor, biases, activation_mode, + output_descriptor, output_data, + GetConvAccumulatorType(dnn::DataType::kDouble), scratch_allocator, + algorithm_config, output_profile_result); } -bool CudnnSupport::DoFusedConvolve( +port::Status CudnnSupport::DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -3740,18 +3738,16 @@ bool CudnnSupport::DoFusedConvolve( DeviceMemory* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - return IsStatusOk( - DoFusedConvolveImpl( - stream, conv_input_descriptor, conv_input_data, conv_input_scale, - filter_descriptor, filter_data, convolution_descriptor, - side_input_data, side_input_scale, bias_descriptor, biases, - activation_mode, output_descriptor, output_data, - GetConvAccumulatorType(dnn::DataType::kFloat), scratch_allocator, - algorithm_config, output_profile_result), - /*report_error=*/!output_profile_result); + return DoFusedConvolveImpl( + stream, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, side_input_data, + side_input_scale, bias_descriptor, biases, activation_mode, + output_descriptor, output_data, + GetConvAccumulatorType(dnn::DataType::kFloat), scratch_allocator, + algorithm_config, output_profile_result); } -bool CudnnSupport::DoFusedConvolve( +port::Status CudnnSupport::DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -3765,18 +3761,16 @@ bool CudnnSupport::DoFusedConvolve( DeviceMemory* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - return IsStatusOk( - DoFusedConvolveImpl( - stream, conv_input_descriptor, conv_input_data, conv_input_scale, - filter_descriptor, filter_data, convolution_descriptor, - side_input_data, side_input_scale, bias_descriptor, biases, - activation_mode, output_descriptor, output_data, - GetConvAccumulatorType(dnn::DataType::kHalf), scratch_allocator, - algorithm_config, output_profile_result), - /*report_error=*/!output_profile_result); + return DoFusedConvolveImpl( + stream, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, side_input_data, + side_input_scale, bias_descriptor, biases, activation_mode, + output_descriptor, output_data, + GetConvAccumulatorType(dnn::DataType::kHalf), scratch_allocator, + algorithm_config, output_profile_result); } -bool CudnnSupport::DoFusedConvolve( +port::Status CudnnSupport::DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -3793,23 +3787,21 @@ bool CudnnSupport::DoFusedConvolve( std::tie(cc_major, cc_minor) = GetCcMajorMinor(stream); if (cc_major < 6 || (cc_major == 6 && cc_minor < 1)) { - LOG(WARNING) << "cudnnConvolutionBiasActivationForward() for int8 is only " - "supported on GPUs with compute capability 6.1 or later."; - return false; + return port::UnimplementedError( + "cudnnConvolutionBiasActivationForward() for int8 is only supported on " + "GPUs with compute capability 6.1 or later."); } - return IsStatusOk( - DoFusedConvolveImpl( - stream, conv_input_descriptor, conv_input_data, conv_input_scale, - filter_descriptor, filter_data, convolution_descriptor, - side_input_data, side_input_scale, bias_descriptor, biases, - activation_mode, output_descriptor, output_data, - GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator, - algorithm_config, output_profile_result), - /*report_error=*/!output_profile_result); + return DoFusedConvolveImpl( + stream, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, side_input_data, + side_input_scale, bias_descriptor, biases, activation_mode, + output_descriptor, output_data, + GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator, + algorithm_config, output_profile_result); } -bool CudnnSupport::DoFusedConvolve( +port::Status CudnnSupport::DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -3826,20 +3818,18 @@ bool CudnnSupport::DoFusedConvolve( stream->parent()->GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor); if (cc_major < 6 || (cc_major == 6 && cc_minor < 1)) { - LOG(WARNING) << "cudnnConvolutionBiasActivationForward() for int8 is only " - "supported on GPUs with compute capability 6.1 or later."; - return false; + return port::UnimplementedError( + "cudnnConvolutionBiasActivationForward() for int8 is only supported on " + "GPUs with compute capability 6.1 or later."); } - return IsStatusOk( - DoFusedConvolveImpl( - stream, conv_input_descriptor, conv_input_data, conv_input_scale, - filter_descriptor, filter_data, convolution_descriptor, - side_input_data, side_input_scale, bias_descriptor, biases, - activation_mode, output_descriptor, output_data, - GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator, - algorithm_config, output_profile_result), - /*report_error=*/!output_profile_result); + return DoFusedConvolveImpl( + stream, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, side_input_data, + side_input_scale, bias_descriptor, biases, activation_mode, + output_descriptor, output_data, + GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator, + algorithm_config, output_profile_result); } port::Status CudnnSupport::DoPrepareForCtcLoss( diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index 181502e03ee..9cab982c9a1 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -277,7 +277,7 @@ class CudnnSupport : public dnn::DnnSupport { dnn::AlgorithmDesc algorithm_desc, DeviceMemory scratch_memory, dnn::ProfileResult* output_profile_result) override; - bool DoFusedConvolve( + port::Status DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, double conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -291,7 +291,7 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) override; - bool DoFusedConvolve( + port::Status DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -305,25 +305,23 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) override; - bool DoFusedConvolve(Stream* stream, - const dnn::BatchDescriptor& conv_input_descriptor, - const DeviceMemory& conv_input_data, - float conv_input_scale, - const dnn::FilterDescriptor& filter_descriptor, - const DeviceMemory& filter_data, - const dnn::ConvolutionDescriptor& convolution_descriptor, - const DeviceMemory& side_input_data, - float side_input_scale, - const dnn::BatchDescriptor& bias_descriptor, - const DeviceMemory& biases, - dnn::ActivationMode activation_mode, - const dnn::BatchDescriptor& output_descriptor, - DeviceMemory* output_data, - ScratchAllocator* scratch_allocator, - const dnn::AlgorithmConfig& algorithm_config, - dnn::ProfileResult* output_profile_result) override; + port::Status DoFusedConvolve( + Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, + const DeviceMemory& conv_input_data, float conv_input_scale, + const dnn::FilterDescriptor& filter_descriptor, + const DeviceMemory& filter_data, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const DeviceMemory& side_input_data, float side_input_scale, + const dnn::BatchDescriptor& bias_descriptor, + const DeviceMemory& biases, + dnn::ActivationMode activation_mode, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory* output_data, + ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + dnn::ProfileResult* output_profile_result) override; - bool DoFusedConvolve( + port::Status DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -337,7 +335,7 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) override; - bool DoFusedConvolve( + port::Status DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 7b45ec2cc87..53cdff8cb7a 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -1163,7 +1163,7 @@ class DnnSupport { // that if the inverse of the filter is applied to the output in VALID mode // the result is the same size as the input - this requires even more // padding of the input. - virtual bool DoFusedConvolve( + virtual port::Status DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, double conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -1176,11 +1176,12 @@ class DnnSupport { DeviceMemory* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - return false; + return port::UnimplementedError( + "DnnSupport::DoFusedConvolve not implemented on this platform."); } // This is the float version of DoFusedConvolve. - virtual bool DoFusedConvolve( + virtual port::Status DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -1193,12 +1194,13 @@ class DnnSupport { DeviceMemory* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - return false; + return port::UnimplementedError( + "DnnSupport::DoFusedConvolve not implemented on this platform."); } // This is the Eigen::half version of DoFusedConvolve. // The scaling parameters are still floats. - virtual bool DoFusedConvolve( + virtual port::Status DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -1213,12 +1215,13 @@ class DnnSupport { ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - return false; + return port::UnimplementedError( + "DnnSupport::DoFusedConvolve not implemented on this platform."); } // This is the int8 version of DoFusedConvolve. // The bias input and scaling parameters are floats. - virtual bool DoFusedConvolve( + virtual port::Status DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -1231,12 +1234,13 @@ class DnnSupport { DeviceMemory* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - return false; + return port::UnimplementedError( + "DnnSupport::DoFusedConvolve not implemented on this platform."); } // This is the int8 version of DoFusedConvolve. // The output, bias input and scaling parameters are floats. - virtual bool DoFusedConvolve( + virtual port::Status DoFusedConvolve( Stream* /*stream*/, const dnn::BatchDescriptor& /*conv_input_descriptor*/, const DeviceMemory& /*conv_input_data*/, float /*conv_input_scale*/, const dnn::FilterDescriptor& /*filter_descriptor*/, @@ -1252,7 +1256,8 @@ class DnnSupport { ScratchAllocator* /*scratch_allocator*/, const dnn::AlgorithmConfig& /*algorithm_config*/, dnn::ProfileResult* /*output_profile_result*/) { - return false; + return port::UnimplementedError( + "DnnSupport::DoFusedConvolve not implemented on this platform."); } template diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.cc b/tensorflow/stream_executor/rocm/rocm_dnn.cc index 4b2761e7658..80306105d4a 100644 --- a/tensorflow/stream_executor/rocm/rocm_dnn.cc +++ b/tensorflow/stream_executor/rocm/rocm_dnn.cc @@ -3680,7 +3680,7 @@ bool MIOpenSupport::DoBatchNormalizationBackwardImpl( return true; } -bool MIOpenSupport::DoFusedConvolve( +port::Status MIOpenSupport::DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, double conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -3693,11 +3693,10 @@ bool MIOpenSupport::DoFusedConvolve( DeviceMemory* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - LOG(ERROR) << "fused convolve not implemented yet"; - return false; + return port::UnimplementedError("fused convolve not implemented yet"); } -bool MIOpenSupport::DoFusedConvolve( +port::Status MIOpenSupport::DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -3710,11 +3709,10 @@ bool MIOpenSupport::DoFusedConvolve( DeviceMemory* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - LOG(ERROR) << "fused convolve not implemented yet"; - return false; + return port::UnimplementedError("fused convolve not implemented yet"); } -bool MIOpenSupport::DoFusedConvolve( +port::Status MIOpenSupport::DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -3728,11 +3726,10 @@ bool MIOpenSupport::DoFusedConvolve( DeviceMemory* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - LOG(ERROR) << "fused convolve not implemented yet"; - return false; + return port::UnimplementedError("fused convolve not implemented yet"); } -bool MIOpenSupport::DoFusedConvolve( +port::Status MIOpenSupport::DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -3745,8 +3742,7 @@ bool MIOpenSupport::DoFusedConvolve( DeviceMemory* output_data, ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - LOG(ERROR) << "fused convolve not implemented yet"; - return false; + return port::UnimplementedError("fused convolve not implemented yet"); } bool MIOpenSupport::DoTransformTensor(Stream* stream, diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.h b/tensorflow/stream_executor/rocm/rocm_dnn.h index b01c1cc5290..654a1bf8f3a 100644 --- a/tensorflow/stream_executor/rocm/rocm_dnn.h +++ b/tensorflow/stream_executor/rocm/rocm_dnn.h @@ -315,7 +315,7 @@ class MIOpenSupport : public dnn::DnnSupport { dnn::AlgorithmDesc algorithm_desc, DeviceMemory scratch_memory, dnn::ProfileResult* output_profile_result) override; - bool DoFusedConvolve( + port::Status DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, double conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -329,7 +329,7 @@ class MIOpenSupport : public dnn::DnnSupport { const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) override; - bool DoFusedConvolve( + port::Status DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, @@ -343,25 +343,23 @@ class MIOpenSupport : public dnn::DnnSupport { const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) override; - bool DoFusedConvolve(Stream* stream, - const dnn::BatchDescriptor& conv_input_descriptor, - const DeviceMemory& conv_input_data, - float conv_input_scale, - const dnn::FilterDescriptor& filter_descriptor, - const DeviceMemory& filter_data, - const dnn::ConvolutionDescriptor& convolution_descriptor, - const DeviceMemory& side_input_data, - float side_input_scale, - const dnn::BatchDescriptor& bias_descriptor, - const DeviceMemory& biases, - dnn::ActivationMode activation_mode, - const dnn::BatchDescriptor& output_descriptor, - DeviceMemory* output_data, - ScratchAllocator* scratch_allocator, - const dnn::AlgorithmConfig& algorithm_config, - dnn::ProfileResult* output_profile_result) override; + port::Status DoFusedConvolve( + Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, + const DeviceMemory& conv_input_data, float conv_input_scale, + const dnn::FilterDescriptor& filter_descriptor, + const DeviceMemory& filter_data, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const DeviceMemory& side_input_data, float side_input_scale, + const dnn::BatchDescriptor& bias_descriptor, + const DeviceMemory& biases, + dnn::ActivationMode activation_mode, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory* output_data, + ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + dnn::ProfileResult* output_profile_result) override; - bool DoFusedConvolve( + port::Status DoFusedConvolve( Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor, const DeviceMemory& conv_input_data, float conv_input_scale, const dnn::FilterDescriptor& filter_descriptor, diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 505d54cf5bf..62689e61be1 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -251,7 +251,7 @@ Stream::Stream(StreamExecutor *parent) : parent_(parent), implementation_(parent->implementation()->GetStreamImplementation()), allocated_(false), - ok_(false), + status_(port::InternalError("Uninitialized stream")), temporary_memory_manager_(this) { VLOG_CALL(PARAM(parent)); } @@ -261,7 +261,7 @@ Stream::Stream(StreamExecutor *parent, : parent_(parent), implementation_(implementation), allocated_(false), - ok_(false), + status_(port::InternalError("Uninitialized stream")), temporary_memory_manager_(this) { VLOG_CALL(PARAM(parent), PARAM(implementation)); } @@ -300,12 +300,12 @@ Stream &Stream::Init() { absl::MutexLock lock(&mu_); CHECK_EQ(false, allocated_) << "stream appears to already have been initialized"; - CHECK(!ok_) << "stream should be in !ok() state pre-initialization"; + CHECK(!status_.ok()) << "stream should be in !ok() state pre-initialization"; if (parent_->AllocateStream(this)) { // Successful initialization! allocated_ = true; - ok_ = true; + status_ = port::Status::OK(); } else { LOG(ERROR) << "failed to allocate stream during initialization"; } @@ -316,11 +316,7 @@ Stream &Stream::Init() { Stream &Stream::InitTimer(Timer *timer) { VLOG_CALL(PARAM(timer)); - if (ok()) { CheckError(parent_->AllocateTimer(timer)); - } else { - LOG(INFO) << "did not allocate timer: " << timer; - } return *this; } @@ -359,17 +355,14 @@ Stream &Stream::ThenBatchNormalizationForward( ScratchAllocator *workspace_allocator) { VLOG_CALL(PARAM(x), PARAM(scale), PARAM(offset), PARAM(x_desc), PARAM(scale_offset_desc), PARAM(epsilon), PARAM(y)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoBatchNormalizationForward( - this, x, scale, offset, estimated_mean, estimated_variance, - side_input, x_desc, scale_offset_desc, epsilon, - exponential_average_factor, activation_mode, y, batch_mean, batch_var, - saved_mean, saved_inv_var, is_training, reserve_space_allocator, - workspace_allocator)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoBatchNormalizationForward( + this, x, scale, offset, estimated_mean, estimated_variance, side_input, + x_desc, scale_offset_desc, epsilon, exponential_average_factor, + activation_mode, y, batch_mean, batch_var, saved_mean, saved_inv_var, + is_training, reserve_space_allocator, workspace_allocator)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -386,15 +379,13 @@ Stream &Stream::ThenBatchNormalizationBackward( VLOG_CALL(PARAM(y_backprop), PARAM(x), PARAM(scale), PARAM(x_desc), PARAM(scale_offset_desc), PARAM(epsilon), PARAM(x_backprop), PARAM(scale_backprop), PARAM(offset_backprop)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoBatchNormalizationBackward( - this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc, - epsilon, x_backprop, scale_backprop, offset_backprop, - reserve_space_data, workspace_allocator)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoBatchNormalizationBackward( + this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc, + epsilon, x_backprop, scale_backprop, offset_backprop, + reserve_space_data, workspace_allocator)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -415,17 +406,14 @@ Stream &Stream::ThenBatchNormalizationForward( ScratchAllocator *workspace_allocator) { VLOG_CALL(PARAM(x), PARAM(scale), PARAM(offset), PARAM(x_desc), PARAM(scale_offset_desc), PARAM(epsilon), PARAM(y)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoBatchNormalizationForward( - this, x, scale, offset, estimated_mean, estimated_variance, - side_input, x_desc, scale_offset_desc, epsilon, - exponential_average_factor, activation_mode, y, batch_mean, batch_var, - saved_mean, saved_inv_var, is_training, reserve_space_allocator, - workspace_allocator)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoBatchNormalizationForward( + this, x, scale, offset, estimated_mean, estimated_variance, side_input, + x_desc, scale_offset_desc, epsilon, exponential_average_factor, + activation_mode, y, batch_mean, batch_var, saved_mean, saved_inv_var, + is_training, reserve_space_allocator, workspace_allocator)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -443,16 +431,14 @@ Stream &Stream::ThenBatchNormalizationBackward( VLOG_CALL(PARAM(y_backprop), PARAM(x), PARAM(scale), PARAM(x_desc), PARAM(scale_offset_desc), PARAM(epsilon), PARAM(x_backprop), PARAM(scale_backprop), PARAM(offset_backprop)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoBatchNormalizationBackward( - this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc, - epsilon, x_backprop, scale_backprop, offset_backprop, - reserve_space_data, workspace_allocator)); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoBatchNormalizationBackward( + this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc, + epsilon, x_backprop, scale_backprop, offset_backprop, + reserve_space_data, workspace_allocator)); - } else { - SetErrorAndLogNoDnnSupport(); - } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -477,20 +463,18 @@ Stream &Stream::ThenFusedConvolveWithAlgorithm( PARAM(activation_mode), PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoFusedConvolve( - this, conv_input_descriptor, conv_input_data, conv_input_scale, - filter_descriptor, filter_data, convolution_descriptor, - side_input_data, side_input_scale, bias_descriptor, biases, - activation_mode, output_descriptor, output, scratch_allocator, - algorithm_config, output_profile_result); - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, side_input_data, + side_input_scale, bias_descriptor, biases, activation_mode, + output_descriptor, output, scratch_allocator, algorithm_config, + output_profile_result); + if (!status.ok() && !output_profile_result) { + CheckStatus(status); } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -515,20 +499,18 @@ Stream &Stream::ThenFusedConvolveWithAlgorithm( PARAM(activation_mode), PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoFusedConvolve( - this, conv_input_descriptor, conv_input_data, conv_input_scale, - filter_descriptor, filter_data, convolution_descriptor, - side_input_data, side_input_scale, bias_descriptor, biases, - activation_mode, output_descriptor, output, scratch_allocator, - algorithm_config, output_profile_result); - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, side_input_data, + side_input_scale, bias_descriptor, biases, activation_mode, + output_descriptor, output, scratch_allocator, algorithm_config, + output_profile_result); + if (!status.ok() && !output_profile_result) { + CheckStatus(status); } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -554,20 +536,18 @@ Stream &Stream::ThenFusedConvolveWithAlgorithm( PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode), PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoFusedConvolve( - this, conv_input_descriptor, conv_input_data, conv_input_scale, - filter_descriptor, filter_data, convolution_descriptor, - side_input_data, side_input_scale, bias_descriptor, biases, - activation_mode, output_descriptor, output, scratch_allocator, - algorithm_config, output_profile_result); - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, side_input_data, + side_input_scale, bias_descriptor, biases, activation_mode, + output_descriptor, output, scratch_allocator, algorithm_config, + output_profile_result); + if (!status.ok() && !output_profile_result) { + CheckStatus(status); } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -592,20 +572,18 @@ Stream &Stream::ThenFusedConvolveWithAlgorithm( PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode), PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoFusedConvolve( - this, conv_input_descriptor, conv_input_data, conv_input_scale, - filter_descriptor, filter_data, convolution_descriptor, - side_input_data, side_input_scale, bias_descriptor, biases, - activation_mode, output_descriptor, output, scratch_allocator, - algorithm_config, output_profile_result); - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, side_input_data, + side_input_scale, bias_descriptor, biases, activation_mode, + output_descriptor, output, scratch_allocator, algorithm_config, + output_profile_result); + if (!status.ok() && !output_profile_result) { + CheckStatus(status); } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -630,20 +608,18 @@ Stream &Stream::ThenFusedConvolveWithAlgorithm( PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode), PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoFusedConvolve( - this, conv_input_descriptor, conv_input_data, conv_input_scale, - filter_descriptor, filter_data, convolution_descriptor, - side_input_data, side_input_scale, bias_descriptor, biases, - activation_mode, output_descriptor, output, scratch_allocator, - algorithm_config, output_profile_result); - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoFusedConvolve( + this, conv_input_descriptor, conv_input_data, conv_input_scale, + filter_descriptor, filter_data, convolution_descriptor, side_input_data, + side_input_scale, bias_descriptor, biases, activation_mode, + output_descriptor, output, scratch_allocator, algorithm_config, + output_profile_result); + if (!status.ok() && !output_profile_result) { + CheckStatus(status); } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -663,29 +639,27 @@ Stream &Stream::ThenConvolveWithAlgorithm( PARAM(convolution_descriptor), PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - dnn::AlgorithmDesc algorithm_desc; - auto status = - dnn->PrepareForConvolution( - dnn::ConvolutionKind::FORWARD, this, input_descriptor, - input_data, filter_descriptor, filter_data, output_descriptor, - *output, convolution_descriptor, algorithm_config, - scratch_allocator, &algorithm_desc, &scratch_memory) - .ok(); - if (status) { - status = dnn->DoConvolve( - this, input_descriptor, input_data, filter_descriptor, filter_data, - convolution_descriptor, output_descriptor, output, algorithm_desc, - &scratch_memory, output_profile_result); - } - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + dnn::AlgorithmDesc algorithm_desc; + auto status = + dnn->PrepareForConvolution( + dnn::ConvolutionKind::FORWARD, this, input_descriptor, + input_data, filter_descriptor, filter_data, output_descriptor, + *output, convolution_descriptor, algorithm_config, + scratch_allocator, &algorithm_desc, &scratch_memory) + .ok(); + if (status) { + status = dnn->DoConvolve( + this, input_descriptor, input_data, filter_descriptor, filter_data, + convolution_descriptor, output_descriptor, output, algorithm_desc, + &scratch_memory, output_profile_result); } + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -705,29 +679,27 @@ Stream &Stream::ThenConvolveWithAlgorithm( PARAM(convolution_descriptor), PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - dnn::AlgorithmDesc algorithm_desc; - auto status = - dnn->PrepareForConvolution( - dnn::ConvolutionKind::FORWARD, this, input_descriptor, - input_data, filter_descriptor, filter_data, output_descriptor, - *output, convolution_descriptor, algorithm_config, - scratch_allocator, &algorithm_desc, &scratch_memory) - .ok(); - if (status) { - status = dnn->DoConvolve( - this, input_descriptor, input_data, filter_descriptor, filter_data, - convolution_descriptor, output_descriptor, output, algorithm_desc, - &scratch_memory, output_profile_result); - } - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + dnn::AlgorithmDesc algorithm_desc; + auto status = + dnn->PrepareForConvolution( + dnn::ConvolutionKind::FORWARD, this, input_descriptor, + input_data, filter_descriptor, filter_data, output_descriptor, + *output, convolution_descriptor, algorithm_config, + scratch_allocator, &algorithm_desc, &scratch_memory) + .ok(); + if (status) { + status = dnn->DoConvolve( + this, input_descriptor, input_data, filter_descriptor, filter_data, + convolution_descriptor, output_descriptor, output, algorithm_desc, + &scratch_memory, output_profile_result); } + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -747,29 +719,27 @@ Stream &Stream::ThenConvolveWithAlgorithm( PARAM(convolution_descriptor), PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - dnn::AlgorithmDesc algorithm_desc; - auto status = - dnn->PrepareForConvolution( - dnn::ConvolutionKind::FORWARD, this, input_descriptor, - input_data, filter_descriptor, filter_data, output_descriptor, - *output, convolution_descriptor, algorithm_config, - scratch_allocator, &algorithm_desc, &scratch_memory) - .ok(); - if (status) { - status = dnn->DoConvolve( - this, input_descriptor, input_data, filter_descriptor, filter_data, - convolution_descriptor, output_descriptor, output, algorithm_desc, - &scratch_memory, output_profile_result); - } - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + dnn::AlgorithmDesc algorithm_desc; + auto status = + dnn->PrepareForConvolution( + dnn::ConvolutionKind::FORWARD, this, input_descriptor, + input_data, filter_descriptor, filter_data, output_descriptor, + *output, convolution_descriptor, algorithm_config, + scratch_allocator, &algorithm_desc, &scratch_memory) + .ok(); + if (status) { + status = dnn->DoConvolve( + this, input_descriptor, input_data, filter_descriptor, filter_data, + convolution_descriptor, output_descriptor, output, algorithm_desc, + &scratch_memory, output_profile_result); } + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -789,29 +759,27 @@ Stream &Stream::ThenConvolveWithAlgorithm( PARAM(convolution_descriptor), PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - dnn::AlgorithmDesc algorithm_desc; - auto status = - dnn->PrepareForConvolution( - dnn::ConvolutionKind::FORWARD, this, input_descriptor, - input_data, filter_descriptor, filter_data, output_descriptor, - *output, convolution_descriptor, algorithm_config, - scratch_allocator, &algorithm_desc, &scratch_memory) - .ok(); - if (status) { - status = dnn->DoConvolve( - this, input_descriptor, input_data, filter_descriptor, filter_data, - convolution_descriptor, output_descriptor, output, algorithm_desc, - &scratch_memory, output_profile_result); - } - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + dnn::AlgorithmDesc algorithm_desc; + auto status = + dnn->PrepareForConvolution( + dnn::ConvolutionKind::FORWARD, this, input_descriptor, + input_data, filter_descriptor, filter_data, output_descriptor, + *output, convolution_descriptor, algorithm_config, + scratch_allocator, &algorithm_desc, &scratch_memory) + .ok(); + if (status) { + status = dnn->DoConvolve( + this, input_descriptor, input_data, filter_descriptor, filter_data, + convolution_descriptor, output_descriptor, output, algorithm_desc, + &scratch_memory, output_profile_result); } + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -831,29 +799,27 @@ Stream &Stream::ThenConvolveWithAlgorithm( PARAM(convolution_descriptor), PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - dnn::AlgorithmDesc algorithm_desc; - auto status = - dnn->PrepareForConvolution( - dnn::ConvolutionKind::FORWARD, this, input_descriptor, - input_data, filter_descriptor, filter_data, output_descriptor, - *output, convolution_descriptor, algorithm_config, - scratch_allocator, &algorithm_desc, &scratch_memory) - .ok(); - if (status) { - status = dnn->DoConvolve( - this, input_descriptor, input_data, filter_descriptor, filter_data, - convolution_descriptor, output_descriptor, output, algorithm_desc, - &scratch_memory, output_profile_result); - } - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + dnn::AlgorithmDesc algorithm_desc; + auto status = + dnn->PrepareForConvolution( + dnn::ConvolutionKind::FORWARD, this, input_descriptor, + input_data, filter_descriptor, filter_data, output_descriptor, + *output, convolution_descriptor, algorithm_config, + scratch_allocator, &algorithm_desc, &scratch_memory) + .ok(); + if (status) { + status = dnn->DoConvolve( + this, input_descriptor, input_data, filter_descriptor, filter_data, + convolution_descriptor, output_descriptor, output, algorithm_desc, + &scratch_memory, output_profile_result); } + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -887,18 +853,15 @@ Stream &Stream::ThenConvolveQuantized( PARAM(coefficient_scales), PARAM(convolution_descriptor), PARAM(output_descriptor), PARAM(output)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoConvolveQuantized( - this, input_descriptor, input_data, filter_descriptor, - filter_coefficients, coefficient_scales, convolution_descriptor, - output_descriptor, output)); - } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoConvolveQuantized( + this, input_descriptor, input_data, filter_descriptor, + filter_coefficients, coefficient_scales, convolution_descriptor, + output_descriptor, output)); + } else { + SetError(); + LOG(WARNING) << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; } return *this; } @@ -917,18 +880,15 @@ Stream &Stream::ThenConvolveQuantized( PARAM(coefficient_scales), PARAM(convolution_descriptor), PARAM(output_descriptor), PARAM(output)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoConvolveQuantized( - this, input_descriptor, input_data, filter_descriptor, - filter_coefficients, coefficient_scales, convolution_descriptor, - output_descriptor, output)); - } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoConvolveQuantized( + this, input_descriptor, input_data, filter_descriptor, + filter_coefficients, coefficient_scales, convolution_descriptor, + output_descriptor, output)); + } else { + SetError(); + LOG(WARNING) << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; } return *this; } @@ -947,15 +907,13 @@ Stream &Stream::ThenSeparableConvolve( PARAM(depth_multiplier), PARAM(first_weights), PARAM(second_weights), PARAM(convolution_descriptor), PARAM(output_descriptor), PARAM(output)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoSeparableConvolve( - this, batch_descriptor, input_data, filter_descriptor, - depth_multiplier, first_weights, second_weights, - convolution_descriptor, output_descriptor, output)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoSeparableConvolve( + this, batch_descriptor, input_data, filter_descriptor, depth_multiplier, + first_weights, second_weights, convolution_descriptor, + output_descriptor, output)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -976,31 +934,29 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm( PARAM(convolution_descriptor), PARAM(input_descriptor), PARAM(backward_input_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - dnn::AlgorithmDesc algorithm_desc; - auto status = - dnn->PrepareForConvolution( - dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor, - *backward_input_data, filter_descriptor, filter_data, - output_descriptor, backward_output_data, - convolution_descriptor, algorithm_config, scratch_allocator, - &algorithm_desc, &scratch_memory) - .ok(); - if (status) { - status = dnn->DoConvolveBackwardData( - this, filter_descriptor, filter_data, output_descriptor, - backward_output_data, convolution_descriptor, input_descriptor, - backward_input_data, algorithm_desc, &scratch_memory, - output_profile_result); - } - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + dnn::AlgorithmDesc algorithm_desc; + auto status = + dnn->PrepareForConvolution( + dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor, + *backward_input_data, filter_descriptor, filter_data, + output_descriptor, backward_output_data, convolution_descriptor, + algorithm_config, scratch_allocator, &algorithm_desc, + &scratch_memory) + .ok(); + if (status) { + status = dnn->DoConvolveBackwardData( + this, filter_descriptor, filter_data, output_descriptor, + backward_output_data, convolution_descriptor, input_descriptor, + backward_input_data, algorithm_desc, &scratch_memory, + output_profile_result); } + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1021,31 +977,29 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm( PARAM(convolution_descriptor), PARAM(input_descriptor), PARAM(backward_input_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - dnn::AlgorithmDesc algorithm_desc; - auto status = - dnn->PrepareForConvolution( - dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor, - *backward_input_data, filter_descriptor, filter_data, - output_descriptor, backward_output_data, - convolution_descriptor, algorithm_config, scratch_allocator, - &algorithm_desc, &scratch_memory) - .ok(); - if (status) { - status = dnn->DoConvolveBackwardData( - this, filter_descriptor, filter_data, output_descriptor, - backward_output_data, convolution_descriptor, input_descriptor, - backward_input_data, algorithm_desc, &scratch_memory, - output_profile_result); - } - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + dnn::AlgorithmDesc algorithm_desc; + auto status = + dnn->PrepareForConvolution( + dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor, + *backward_input_data, filter_descriptor, filter_data, + output_descriptor, backward_output_data, convolution_descriptor, + algorithm_config, scratch_allocator, &algorithm_desc, + &scratch_memory) + .ok(); + if (status) { + status = dnn->DoConvolveBackwardData( + this, filter_descriptor, filter_data, output_descriptor, + backward_output_data, convolution_descriptor, input_descriptor, + backward_input_data, algorithm_desc, &scratch_memory, + output_profile_result); } + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1066,31 +1020,29 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm( PARAM(convolution_descriptor), PARAM(input_descriptor), PARAM(backward_input_data), PARAM(algorithm_config)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - dnn::AlgorithmDesc algorithm_desc; - auto status = - dnn->PrepareForConvolution( - dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor, - *backward_input_data, filter_descriptor, filter_data, - output_descriptor, backward_output_data, - convolution_descriptor, algorithm_config, scratch_allocator, - &algorithm_desc, &scratch_memory) - .ok(); - if (status) { - status = dnn->DoConvolveBackwardData( - this, filter_descriptor, filter_data, output_descriptor, - backward_output_data, convolution_descriptor, input_descriptor, - backward_input_data, algorithm_desc, &scratch_memory, - output_profile_result); - } - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + dnn::AlgorithmDesc algorithm_desc; + auto status = + dnn->PrepareForConvolution( + dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor, + *backward_input_data, filter_descriptor, filter_data, + output_descriptor, backward_output_data, convolution_descriptor, + algorithm_config, scratch_allocator, &algorithm_desc, + &scratch_memory) + .ok(); + if (status) { + status = dnn->DoConvolveBackwardData( + this, filter_descriptor, filter_data, output_descriptor, + backward_output_data, convolution_descriptor, input_descriptor, + backward_input_data, algorithm_desc, &scratch_memory, + output_profile_result); } + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1111,31 +1063,29 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm( PARAM(convolution_descriptor), PARAM(filter_descriptor), PARAM(backward_filter_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - dnn::AlgorithmDesc algorithm_desc; - auto status = - dnn->PrepareForConvolution( - dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor, - input_data, filter_descriptor, *backward_filter_data, - output_descriptor, backward_output_data, - convolution_descriptor, algorithm_config, scratch_allocator, - &algorithm_desc, &scratch_memory) - .ok(); - if (status) { - status = dnn->DoConvolveBackwardFilter( - this, input_descriptor, input_data, output_descriptor, - backward_output_data, convolution_descriptor, filter_descriptor, - backward_filter_data, algorithm_desc, &scratch_memory, - output_profile_result); - } - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + dnn::AlgorithmDesc algorithm_desc; + auto status = + dnn->PrepareForConvolution( + dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor, + input_data, filter_descriptor, *backward_filter_data, + output_descriptor, backward_output_data, convolution_descriptor, + algorithm_config, scratch_allocator, &algorithm_desc, + &scratch_memory) + .ok(); + if (status) { + status = dnn->DoConvolveBackwardFilter( + this, input_descriptor, input_data, output_descriptor, + backward_output_data, convolution_descriptor, filter_descriptor, + backward_filter_data, algorithm_desc, &scratch_memory, + output_profile_result); } + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1156,31 +1106,29 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm( PARAM(convolution_descriptor), PARAM(filter_descriptor), PARAM(backward_filter_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - dnn::AlgorithmDesc algorithm_desc; - auto status = - dnn->PrepareForConvolution( - dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor, - input_data, filter_descriptor, *backward_filter_data, - output_descriptor, backward_output_data, - convolution_descriptor, algorithm_config, scratch_allocator, - &algorithm_desc, &scratch_memory) - .ok(); - if (status) { - status = dnn->DoConvolveBackwardFilter( - this, input_descriptor, input_data, output_descriptor, - backward_output_data, convolution_descriptor, filter_descriptor, - backward_filter_data, algorithm_desc, &scratch_memory, - output_profile_result); - } - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + dnn::AlgorithmDesc algorithm_desc; + auto status = + dnn->PrepareForConvolution( + dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor, + input_data, filter_descriptor, *backward_filter_data, + output_descriptor, backward_output_data, convolution_descriptor, + algorithm_config, scratch_allocator, &algorithm_desc, + &scratch_memory) + .ok(); + if (status) { + status = dnn->DoConvolveBackwardFilter( + this, input_descriptor, input_data, output_descriptor, + backward_output_data, convolution_descriptor, filter_descriptor, + backward_filter_data, algorithm_desc, &scratch_memory, + output_profile_result); } + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1201,31 +1149,29 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm( PARAM(convolution_descriptor), PARAM(filter_descriptor), PARAM(backward_filter_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - dnn::AlgorithmDesc algorithm_desc; - auto status = - dnn->PrepareForConvolution( - dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor, - input_data, filter_descriptor, *backward_filter_data, - output_descriptor, backward_output_data, - convolution_descriptor, algorithm_config, scratch_allocator, - &algorithm_desc, &scratch_memory) - .ok(); - if (status) { - status = dnn->DoConvolveBackwardFilter( - this, input_descriptor, input_data, output_descriptor, - backward_output_data, convolution_descriptor, filter_descriptor, - backward_filter_data, algorithm_desc, &scratch_memory, - output_profile_result); - } - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + dnn::AlgorithmDesc algorithm_desc; + auto status = + dnn->PrepareForConvolution( + dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor, + input_data, filter_descriptor, *backward_filter_data, + output_descriptor, backward_output_data, convolution_descriptor, + algorithm_config, scratch_allocator, &algorithm_desc, + &scratch_memory) + .ok(); + if (status) { + status = dnn->DoConvolveBackwardFilter( + this, input_descriptor, input_data, output_descriptor, + backward_output_data, convolution_descriptor, filter_descriptor, + backward_filter_data, algorithm_desc, &scratch_memory, + output_profile_result); } + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1239,14 +1185,12 @@ Stream &Stream::ThenConvolveBackwardBiasImpl( VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), PARAM(bias_descriptor), PARAM(backward_bias_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoConvolveBackwardBias(this, input_descriptor, input_data, - bias_descriptor, - backward_bias_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoConvolveBackwardBias(this, input_descriptor, input_data, + bias_descriptor, + backward_bias_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1286,13 +1230,11 @@ Stream &Stream::ThenMatMul(const DeviceMemory &input_data, VLOG_CALL(PARAM(input_data), PARAM(weights), PARAM(input_dimensions), PARAM(output_dimensions), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoMatMul(this, input_data, weights, input_dimensions, - output_dimensions, output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoMatMul(this, input_data, weights, input_dimensions, + output_dimensions, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1307,14 +1249,12 @@ Stream &Stream::ThenMatMulQuantized( PARAM(input_dimensions), PARAM(output_dimensions), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoMatMulQuantized(this, input_data, weights, - weight_scales, input_dimensions, - output_dimensions, output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoMatMulQuantized(this, input_data, weights, weight_scales, + input_dimensions, output_dimensions, + output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1329,14 +1269,12 @@ Stream &Stream::ThenMatMulQuantized( PARAM(input_dimensions), PARAM(output_dimensions), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoMatMulQuantized(this, input_data, weights, - weight_scales, input_dimensions, - output_dimensions, output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoMatMulQuantized(this, input_data, weights, weight_scales, + input_dimensions, output_dimensions, + output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1348,13 +1286,11 @@ Stream &Stream::ThenBiasAdd(const DeviceMemory &input_data, VLOG_CALL(PARAM(input_data), PARAM(biases), PARAM(dimensions), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError( - dnn->DoBiasAdd(this, input_data, biases, dimensions, output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError( + dnn->DoBiasAdd(this, input_data, biases, dimensions, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1369,17 +1305,14 @@ Stream &Stream::ThenPoolForward( PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), PARAM(workspace_allocator)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, - input_data, output_dimensions, output_data, - workspace_allocator)); - } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, + input_data, output_dimensions, output_data, + workspace_allocator)); + } else { + SetError(); + LOG(WARNING) << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; } return *this; } @@ -1394,14 +1327,12 @@ Stream &Stream::ThenPoolForward( PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), PARAM(workspace_allocator)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, - input_data, output_dimensions, output_data, - workspace_allocator)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, + input_data, output_dimensions, output_data, + workspace_allocator)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1417,14 +1348,12 @@ Stream &Stream::ThenPoolForward( PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), PARAM(workspace_allocator)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, - input_data, output_dimensions, output_data, - workspace_allocator)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, + input_data, output_dimensions, output_data, + workspace_allocator)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1439,14 +1368,12 @@ Stream &Stream::ThenPoolForward( PARAM(input_data), PARAM(output_dimensions), PARAM(output_data), PARAM(workspace_allocator)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, - input_data, output_dimensions, output_data, - workspace_allocator)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions, + input_data, output_dimensions, output_data, + workspace_allocator)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1465,18 +1392,15 @@ Stream &Stream::ThenPoolBackward( PARAM(input_diff_data), PARAM(output_diff_data), PARAM(workspace_allocator)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions, - input_data, output_dimensions, output_data, - input_diff_data, output_diff_data, - workspace_allocator)); - } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions, + input_data, output_dimensions, output_data, + input_diff_data, output_diff_data, + workspace_allocator)); + } else { + SetError(); + LOG(WARNING) << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; } return *this; } @@ -1495,15 +1419,13 @@ Stream &Stream::ThenPoolBackward( PARAM(input_diff_data), PARAM(output_diff_data), PARAM(workspace_allocator)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions, - input_data, output_dimensions, output_data, - input_diff_data, output_diff_data, - workspace_allocator)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions, + input_data, output_dimensions, output_data, + input_diff_data, output_diff_data, + workspace_allocator)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1522,15 +1444,13 @@ Stream &Stream::ThenPoolBackward( PARAM(input_diff_data), PARAM(output_diff_data), PARAM(workspace_allocator)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions, - input_data, output_dimensions, output_data, - input_diff_data, output_diff_data, - workspace_allocator)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions, + input_data, output_dimensions, output_data, + input_diff_data, output_diff_data, + workspace_allocator)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1542,13 +1462,11 @@ Stream &Stream::ThenNormalizeWithDimensions( VLOG_CALL(PARAM(normalize_descriptor), PARAM(dimensions), PARAM(input_data), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoNormalizeWithDimensions( - this, normalize_descriptor, dimensions, input_data, output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoNormalizeWithDimensions( + this, normalize_descriptor, dimensions, input_data, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1564,15 +1482,13 @@ Stream &Stream::ThenNormalizeBackwardWithDimensions( PARAM(normalized_data), PARAM(normalized_variable_gradient), PARAM(raw_variable_gradient), PARAM(workspace_allocator)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoNormalizeBackwardWithDimensions( - this, normalize_descriptor, dimensions, raw_data, normalized_data, - normalized_variable_gradient, raw_variable_gradient, - workspace_allocator)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoNormalizeBackwardWithDimensions( + this, normalize_descriptor, dimensions, raw_data, normalized_data, + normalized_variable_gradient, raw_variable_gradient, + workspace_allocator)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1593,13 +1509,11 @@ Stream &Stream::ThenActivateWithOptions(dnn::ActivationMode activation_mode, VLOG_CALL(PARAM(activation_mode), PARAM(dimensions), PARAM(input_data), PARAM(output_data), PARAM(options)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoActivate(this, activation_mode, dimensions, input_data, - output_data, options)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoActivate(this, activation_mode, dimensions, input_data, + output_data, options)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1623,13 +1537,11 @@ Stream &Stream::ThenDepthConcatenate( } } - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoDepthConcatenate(this, input_dimensions, input_data, - output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoDepthConcatenate(this, input_dimensions, input_data, + output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1670,13 +1582,11 @@ Stream &Stream::ThenSpaceConcatenate( return *this; } } - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoSpaceConcatenate(this, input_dimensions, input_data, - output_data, concat_direction)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoSpaceConcatenate(this, input_dimensions, input_data, + output_data, concat_direction)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1688,13 +1598,11 @@ Stream &Stream::ThenReshape(const dnn::BatchDescriptor &input_dimensions, VLOG_CALL(PARAM(input_dimensions), PARAM(input_data), PARAM(output_dimensions), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoReshape(this, input_dimensions, input_data, - output_dimensions, output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoReshape(this, input_dimensions, input_data, + output_dimensions, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1708,14 +1616,12 @@ Stream &Stream::ThenDepthToSpace( PARAM(depth_to_space_layout), PARAM(sqrt_depth_reduction), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoDepthToSpace(this, input_dimensions, input_data, - depth_to_space_layout, - sqrt_depth_reduction, output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoDepthToSpace(this, input_dimensions, input_data, + depth_to_space_layout, sqrt_depth_reduction, + output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1729,14 +1635,12 @@ Stream &Stream::ThenSpaceToDepth( PARAM(space_to_depth_layout), PARAM(sqrt_depth_increase), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoSpaceToDepth(this, input_dimensions, input_data, - space_to_depth_layout, sqrt_depth_increase, - output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoSpaceToDepth(this, input_dimensions, input_data, + space_to_depth_layout, sqrt_depth_increase, + output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1750,14 +1654,12 @@ Stream &Stream::ThenElementwiseOperate( VLOG_CALL(PARAM(operation), PARAM(input_dimensions), PARAM(input_data), PARAM(output_dimensions), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoElementwiseOperate(this, operation, input_dimensions, - input_data, output_dimensions, - output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoElementwiseOperate(this, operation, input_dimensions, + input_data, output_dimensions, + output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1773,14 +1675,12 @@ Stream &Stream::ThenElementwiseOperateScaledQuantized( PARAM(input_dimensions), PARAM(input_data), PARAM(output_dimensions), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoElementwiseOperateScaledQuantized( - this, operation, input_multiplicands, output_divisor, - input_dimensions, input_data, output_dimensions, output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoElementwiseOperateScaledQuantized( + this, operation, input_multiplicands, output_divisor, input_dimensions, + input_data, output_dimensions, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1793,13 +1693,11 @@ Stream &Stream::ThenXYPad(const dnn::BatchDescriptor &dimensions, PARAM(right_pad), PARAM(top_pad), PARAM(bottom_pad), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoXYPad(this, dimensions, input_data, left_pad, right_pad, - top_pad, bottom_pad, output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoXYPad(this, dimensions, input_data, left_pad, right_pad, + top_pad, bottom_pad, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1813,14 +1711,11 @@ Stream &Stream::ThenXYSlice(const dnn::BatchDescriptor &dimensions, PARAM(right_trim), PARAM(top_trim), PARAM(bottom_trim), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoXYSlice(this, dimensions, input_data, left_trim, - right_trim, top_trim, bottom_trim, - output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoXYSlice(this, dimensions, input_data, left_trim, + right_trim, top_trim, bottom_trim, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1832,13 +1727,11 @@ Stream &Stream::ThenXYBroadcast(const dnn::BatchDescriptor &dimensions, VLOG_CALL(PARAM(dimensions), PARAM(input_data), PARAM(replicate_x), PARAM(replicate_y), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoXYBroadcast(this, dimensions, input_data, replicate_x, - replicate_y, output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoXYBroadcast(this, dimensions, input_data, replicate_x, + replicate_y, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1849,13 +1742,11 @@ Stream &Stream::ThenMemcpyD2HQuantized( VLOG_CALL(PARAM(gpu_unquantized_src), PARAM(mode), PARAM(host_dst), PARAM(size)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoMemcpyD2HQuantized(this, gpu_unquantized_src, mode, - host_dst, size)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoMemcpyD2HQuantized(this, gpu_unquantized_src, mode, + host_dst, size)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1866,13 +1757,11 @@ Stream &Stream::ThenMemcpyH2DQuantized( VLOG_CALL(PARAM(host_src), PARAM(size), PARAM(mode), PARAM(gpu_unquantized_dst)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoMemcpyH2DQuantized(this, host_src, size, mode, - gpu_unquantized_dst)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoMemcpyH2DQuantized(this, host_src, size, mode, + gpu_unquantized_dst)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -1920,7 +1809,7 @@ Stream *Stream::GetOrCreateSubStream() { false); Stream *sub_stream = sub_streams_.back().first.get(); sub_stream->Init(); - if (!sub_stream->ok_) { + if (!sub_stream->ok()) { LOG(ERROR) << "sub-stream failed to be initialized"; } VLOG(1) << DebugStreamPointers() << " created new sub_stream " @@ -1972,24 +1861,14 @@ void Stream::ReturnSubStream(Stream *sub_stream) { Stream &Stream::ThenStartTimer(Timer *t) { VLOG_CALL(PARAM(t)); - if (ok()) { - CheckError(parent_->StartTimer(this, t)); - } else { - LOG(INFO) << DebugStreamPointers() - << " did not enqueue 'start timer': " << t; - } + CheckError(parent_->StartTimer(this, t)); return *this; } Stream &Stream::ThenStopTimer(Timer *t) { VLOG_CALL(PARAM(t)); - if (ok()) { - CheckError(parent_->StopTimer(this, t)); - } else { - LOG(INFO) << DebugStreamPointers() - << " did not enqueue 'stop timer': " << t; - } + CheckError(parent_->StopTimer(this, t)); return *this; } @@ -2079,7 +1958,8 @@ Stream &Stream::ThenBlasAsum(uint64 elem_count, const DeviceMemory &x, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); ThenBlasImpl &, int, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasAsum, elem_count, x, incx, result); } @@ -2090,7 +1970,8 @@ Stream &Stream::ThenBlasAsum(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); ThenBlasImpl> &, int, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasAsum, elem_count, x, incx, result); } @@ -2101,7 +1982,8 @@ Stream &Stream::ThenBlasAsum(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); ThenBlasImpl> &, int, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasAsum, elem_count, x, incx, result); } @@ -2113,7 +1995,8 @@ Stream &Stream::ThenBlasAxpy(uint64 elem_count, float alpha, PARAM(incy)); ThenBlasImpl &, int, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx, y, incy); } @@ -2125,7 +2008,8 @@ Stream &Stream::ThenBlasAxpy(uint64 elem_count, double alpha, PARAM(incy)); ThenBlasImpl &, int, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx, y, incy); } @@ -2139,7 +2023,8 @@ Stream &Stream::ThenBlasAxpy(uint64 elem_count, std::complex alpha, ThenBlasImpl, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx, y, incy); } @@ -2153,7 +2038,8 @@ Stream &Stream::ThenBlasAxpy(uint64 elem_count, std::complex alpha, ThenBlasImpl, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx, y, incy); } @@ -2163,7 +2049,8 @@ Stream &Stream::ThenBlasCopy(uint64 elem_count, const DeviceMemory &x, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); ThenBlasImpl &, int, DeviceMemory *, - int> impl; + int> + impl; return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y, incy); } @@ -2173,7 +2060,8 @@ Stream &Stream::ThenBlasCopy(uint64 elem_count, const DeviceMemory &x, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); ThenBlasImpl &, int, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y, incy); } @@ -2185,7 +2073,8 @@ Stream &Stream::ThenBlasCopy(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y, incy); } @@ -2197,7 +2086,8 @@ Stream &Stream::ThenBlasCopy(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y, incy); } @@ -2209,7 +2099,8 @@ Stream &Stream::ThenBlasDot(uint64 elem_count, const DeviceMemory &x, PARAM(result)); ThenBlasImpl &, int, - const DeviceMemory &, int, DeviceMemory *> impl; + const DeviceMemory &, int, DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasDot, elem_count, x, incx, y, incy, result); } @@ -2221,7 +2112,8 @@ Stream &Stream::ThenBlasDot(uint64 elem_count, const DeviceMemory &x, PARAM(result)); ThenBlasImpl &, int, - const DeviceMemory &, int, DeviceMemory *> impl; + const DeviceMemory &, int, DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasDot, elem_count, x, incx, y, incy, result); } @@ -2237,7 +2129,8 @@ Stream &Stream::ThenBlasDotc(uint64 elem_count, ThenBlasImpl> &, int, const DeviceMemory> &, int, - DeviceMemory> *> impl; + DeviceMemory> *> + impl; return impl(this, &blas::BlasSupport::DoBlasDotc, elem_count, x, incx, y, incy, result); } @@ -2253,7 +2146,8 @@ Stream &Stream::ThenBlasDotc(uint64 elem_count, ThenBlasImpl> &, int, const DeviceMemory> &, int, - DeviceMemory> *> impl; + DeviceMemory> *> + impl; return impl(this, &blas::BlasSupport::DoBlasDotc, elem_count, x, incx, y, incy, result); } @@ -2269,7 +2163,8 @@ Stream &Stream::ThenBlasDotu(uint64 elem_count, ThenBlasImpl> &, int, const DeviceMemory> &, int, - DeviceMemory> *> impl; + DeviceMemory> *> + impl; return impl(this, &blas::BlasSupport::DoBlasDotu, elem_count, x, incx, y, incy, result); } @@ -2285,7 +2180,8 @@ Stream &Stream::ThenBlasDotu(uint64 elem_count, ThenBlasImpl> &, int, const DeviceMemory> &, int, - DeviceMemory> *> impl; + DeviceMemory> *> + impl; return impl(this, &blas::BlasSupport::DoBlasDotu, elem_count, x, incx, y, incy, result); } @@ -2305,7 +2201,8 @@ Stream &Stream::ThenBlasNrm2(uint64 elem_count, const DeviceMemory &x, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); ThenBlasImpl &, int, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasNrm2, elem_count, x, incx, result); } @@ -2316,7 +2213,8 @@ Stream &Stream::ThenBlasNrm2(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); ThenBlasImpl> &, int, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasNrm2, elem_count, x, incx, result); } @@ -2327,7 +2225,8 @@ Stream &Stream::ThenBlasNrm2(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); ThenBlasImpl> &, int, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasNrm2, elem_count, x, incx, result); } @@ -2339,7 +2238,8 @@ Stream &Stream::ThenBlasRot(uint64 elem_count, DeviceMemory *x, int incx, PARAM(c), PARAM(s)); ThenBlasImpl *, int, DeviceMemory *, int, - float, float> impl; + float, float> + impl; return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy, c, s); } @@ -2351,7 +2251,8 @@ Stream &Stream::ThenBlasRot(uint64 elem_count, DeviceMemory *x, PARAM(c), PARAM(s)); ThenBlasImpl *, int, DeviceMemory *, int, - double, double> impl; + double, double> + impl; return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy, c, s); } @@ -2364,7 +2265,8 @@ Stream &Stream::ThenBlasRot(uint64 elem_count, PARAM(c), PARAM(s)); ThenBlasImpl> *, int, - DeviceMemory> *, int, float, float> impl; + DeviceMemory> *, int, float, float> + impl; return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy, c, s); } @@ -2377,7 +2279,8 @@ Stream &Stream::ThenBlasRot(uint64 elem_count, PARAM(c), PARAM(s)); ThenBlasImpl> *, int, - DeviceMemory> *, int, double, double> impl; + DeviceMemory> *, int, double, double> + impl; return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy, c, s); } @@ -2387,7 +2290,8 @@ Stream &Stream::ThenBlasRotg(DeviceMemory *a, DeviceMemory *b, VLOG_CALL(PARAM(a), PARAM(b), PARAM(c), PARAM(s)); ThenBlasImpl *, DeviceMemory *, - DeviceMemory *, DeviceMemory *> impl; + DeviceMemory *, DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s); } @@ -2396,7 +2300,8 @@ Stream &Stream::ThenBlasRotg(DeviceMemory *a, DeviceMemory *b, VLOG_CALL(PARAM(a), PARAM(b), PARAM(c), PARAM(s)); ThenBlasImpl *, DeviceMemory *, - DeviceMemory *, DeviceMemory *> impl; + DeviceMemory *, DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s); } @@ -2408,7 +2313,8 @@ Stream &Stream::ThenBlasRotg(DeviceMemory> *a, ThenBlasImpl> *, DeviceMemory> *, DeviceMemory *, - DeviceMemory> *> impl; + DeviceMemory> *> + impl; return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s); } @@ -2420,7 +2326,8 @@ Stream &Stream::ThenBlasRotg(DeviceMemory> *a, ThenBlasImpl> *, DeviceMemory> *, DeviceMemory *, - DeviceMemory> *> impl; + DeviceMemory> *> + impl; return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s); } @@ -2431,7 +2338,8 @@ Stream &Stream::ThenBlasRotm(uint64 elem_count, DeviceMemory *x, PARAM(param)); ThenBlasImpl *, int, DeviceMemory *, int, - const DeviceMemory &> impl; + const DeviceMemory &> + impl; return impl(this, &blas::BlasSupport::DoBlasRotm, elem_count, x, incx, y, incy, param); } @@ -2443,7 +2351,8 @@ Stream &Stream::ThenBlasRotm(uint64 elem_count, DeviceMemory *x, PARAM(param)); ThenBlasImpl *, int, DeviceMemory *, int, - const DeviceMemory &> impl; + const DeviceMemory &> + impl; return impl(this, &blas::BlasSupport::DoBlasRotm, elem_count, x, incx, y, incy, param); } @@ -2456,7 +2365,8 @@ Stream &Stream::ThenBlasRotmg(DeviceMemory *d1, DeviceMemory *d2, ThenBlasImpl *, DeviceMemory *, DeviceMemory *, const DeviceMemory &, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasRotmg, d1, d2, x1, y1, param); } @@ -2469,7 +2379,8 @@ Stream &Stream::ThenBlasRotmg(DeviceMemory *d1, ThenBlasImpl *, DeviceMemory *, DeviceMemory *, const DeviceMemory &, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasRotmg, d1, d2, x1, y1, param); } @@ -2510,7 +2421,8 @@ Stream &Stream::ThenBlasScal(uint64 elem_count, std::complex alpha, VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx)); ThenBlasImpl, DeviceMemory> *, - int> impl; + int> + impl; return impl(this, &blas::BlasSupport::DoBlasScal, elem_count, alpha, x, incx); } @@ -2519,7 +2431,8 @@ Stream &Stream::ThenBlasScal(uint64 elem_count, std::complex alpha, VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx)); ThenBlasImpl, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasScal, elem_count, alpha, x, incx); } @@ -2549,7 +2462,8 @@ Stream &Stream::ThenBlasSwap(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); ThenBlasImpl> *, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSwap, elem_count, x, incx, y, incy); } @@ -2560,7 +2474,8 @@ Stream &Stream::ThenBlasSwap(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy)); ThenBlasImpl> *, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSwap, elem_count, x, incx, y, incy); } @@ -2591,7 +2506,8 @@ Stream &Stream::ThenBlasIamax(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); ThenBlasImpl> &, int, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasIamax, elem_count, x, incx, result); } @@ -2602,7 +2518,8 @@ Stream &Stream::ThenBlasIamax(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); ThenBlasImpl> &, int, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasIamax, elem_count, x, incx, result); } @@ -2633,7 +2550,8 @@ Stream &Stream::ThenBlasIamin(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); ThenBlasImpl> &, int, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasIamin, elem_count, x, incx, result); } @@ -2644,7 +2562,8 @@ Stream &Stream::ThenBlasIamin(uint64 elem_count, VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result)); ThenBlasImpl> &, int, - DeviceMemory *> impl; + DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasIamin, elem_count, x, incx, result); } @@ -2660,7 +2579,8 @@ Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n, ThenBlasImpl &, int, const DeviceMemory &, - int, float, DeviceMemory *, int> impl; + int, float, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } @@ -2676,7 +2596,8 @@ Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n, ThenBlasImpl &, int, const DeviceMemory &, - int, double, DeviceMemory *, int> impl; + int, double, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } @@ -2695,8 +2616,8 @@ Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } @@ -2715,8 +2636,8 @@ Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } @@ -2731,7 +2652,8 @@ Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n, ThenBlasImpl &, int, const DeviceMemory &, - int, float, DeviceMemory *, int> impl; + int, float, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } @@ -2746,7 +2668,8 @@ Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n, ThenBlasImpl &, int, const DeviceMemory &, - int, double, DeviceMemory *, int> impl; + int, double, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } @@ -2765,8 +2688,8 @@ Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } @@ -2785,8 +2708,8 @@ Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } @@ -2799,8 +2722,8 @@ Stream &Stream::ThenBlasGer(uint64 m, uint64 n, float alpha, PARAM(incy), PARAM(a), PARAM(lda)); ThenBlasImpl &, int, - const DeviceMemory &, int, DeviceMemory *, - int> impl; + const DeviceMemory &, int, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGer, m, n, alpha, x, incx, y, incy, a, lda); } @@ -2813,8 +2736,8 @@ Stream &Stream::ThenBlasGer(uint64 m, uint64 n, double alpha, PARAM(incy), PARAM(a), PARAM(lda)); ThenBlasImpl &, int, - const DeviceMemory &, int, DeviceMemory *, - int> impl; + const DeviceMemory &, int, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGer, m, n, alpha, x, incx, y, incy, a, lda); } @@ -2831,7 +2754,8 @@ Stream &Stream::ThenBlasGerc(uint64 m, uint64 n, std::complex alpha, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGerc, m, n, alpha, x, incx, y, incy, a, lda); } @@ -2848,7 +2772,8 @@ Stream &Stream::ThenBlasGerc(uint64 m, uint64 n, std::complex alpha, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGerc, m, n, alpha, x, incx, y, incy, a, lda); } @@ -2865,7 +2790,8 @@ Stream &Stream::ThenBlasGeru(uint64 m, uint64 n, std::complex alpha, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGeru, m, n, alpha, x, incx, y, incy, a, lda); } @@ -2882,7 +2808,8 @@ Stream &Stream::ThenBlasGeru(uint64 m, uint64 n, std::complex alpha, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGeru, m, n, alpha, x, incx, y, incy, a, lda); } @@ -2900,8 +2827,8 @@ Stream &Stream::ThenBlasHbmv(blas::UpperLower uplo, uint64 n, uint64 k, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHbmv, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); } @@ -2919,8 +2846,8 @@ Stream &Stream::ThenBlasHbmv(blas::UpperLower uplo, uint64 n, uint64 k, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHbmv, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); } @@ -2938,8 +2865,8 @@ Stream &Stream::ThenBlasHemv(blas::UpperLower uplo, uint64 n, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHemv, uplo, n, alpha, a, lda, x, incx, beta, y, incy); } @@ -2957,8 +2884,8 @@ Stream &Stream::ThenBlasHemv(blas::UpperLower uplo, uint64 n, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHemv, uplo, n, alpha, a, lda, x, incx, beta, y, incy); } @@ -2972,7 +2899,8 @@ Stream &Stream::ThenBlasHer(blas::UpperLower uplo, uint64 n, float alpha, ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHer, uplo, n, alpha, x, incx, a, lda); } @@ -2986,7 +2914,8 @@ Stream &Stream::ThenBlasHer(blas::UpperLower uplo, uint64 n, double alpha, ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHer, uplo, n, alpha, x, incx, a, lda); } @@ -3004,7 +2933,8 @@ Stream &Stream::ThenBlasHer2(blas::UpperLower uplo, uint64 n, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHer2, uplo, n, alpha, x, incx, y, incy, a, lda); } @@ -3022,7 +2952,8 @@ Stream &Stream::ThenBlasHer2(blas::UpperLower uplo, uint64 n, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHer2, uplo, n, alpha, x, incx, y, incy, a, lda); } @@ -3039,8 +2970,8 @@ Stream &Stream::ThenBlasHpmv(blas::UpperLower uplo, uint64 n, ThenBlasImpl, const DeviceMemory> &, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHpmv, uplo, n, alpha, ap, x, incx, beta, y, incy); } @@ -3057,8 +2988,8 @@ Stream &Stream::ThenBlasHpmv(blas::UpperLower uplo, uint64 n, ThenBlasImpl, const DeviceMemory> &, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHpmv, uplo, n, alpha, ap, x, incx, beta, y, incy); } @@ -3071,7 +3002,8 @@ Stream &Stream::ThenBlasHpr(blas::UpperLower uplo, uint64 n, float alpha, ThenBlasImpl> &, int, - DeviceMemory> *> impl; + DeviceMemory> *> + impl; return impl(this, &blas::BlasSupport::DoBlasHpr, uplo, n, alpha, x, incx, ap); } @@ -3083,7 +3015,8 @@ Stream &Stream::ThenBlasHpr(blas::UpperLower uplo, uint64 n, double alpha, ThenBlasImpl> &, int, - DeviceMemory> *> impl; + DeviceMemory> *> + impl; return impl(this, &blas::BlasSupport::DoBlasHpr, uplo, n, alpha, x, incx, ap); } @@ -3099,7 +3032,8 @@ Stream &Stream::ThenBlasHpr2(blas::UpperLower uplo, uint64 n, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - DeviceMemory> *> impl; + DeviceMemory> *> + impl; return impl(this, &blas::BlasSupport::DoBlasHpr2, uplo, n, alpha, x, incx, y, incy, ap); } @@ -3116,7 +3050,8 @@ Stream &Stream::ThenBlasHpr2(blas::UpperLower uplo, uint64 n, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - DeviceMemory> *> impl; + DeviceMemory> *> + impl; return impl(this, &blas::BlasSupport::DoBlasHpr2, uplo, n, alpha, x, incx, y, incy, ap); } @@ -3130,7 +3065,8 @@ Stream &Stream::ThenBlasSbmv(blas::UpperLower uplo, uint64 n, uint64 k, ThenBlasImpl &, int, const DeviceMemory &, - int, float, DeviceMemory *, int> impl; + int, float, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSbmv, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); } @@ -3144,7 +3080,8 @@ Stream &Stream::ThenBlasSbmv(blas::UpperLower uplo, uint64 n, uint64 k, ThenBlasImpl &, int, const DeviceMemory &, - int, double, DeviceMemory *, int> impl; + int, double, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSbmv, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); } @@ -3158,7 +3095,8 @@ Stream &Stream::ThenBlasSpmv(blas::UpperLower uplo, uint64 n, float alpha, ThenBlasImpl &, const DeviceMemory &, int, float, DeviceMemory *, - int> impl; + int> + impl; return impl(this, &blas::BlasSupport::DoBlasSpmv, uplo, n, alpha, ap, x, incx, beta, y, incy); } @@ -3172,7 +3110,8 @@ Stream &Stream::ThenBlasSpmv(blas::UpperLower uplo, uint64 n, double alpha, ThenBlasImpl &, const DeviceMemory &, int, double, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSpmv, uplo, n, alpha, ap, x, incx, beta, y, incy); } @@ -3184,7 +3123,8 @@ Stream &Stream::ThenBlasSpr(blas::UpperLower uplo, uint64 n, float alpha, PARAM(ap)); ThenBlasImpl &, - int, DeviceMemory *> impl; + int, DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasSpr, uplo, n, alpha, x, incx, ap); } @@ -3195,7 +3135,8 @@ Stream &Stream::ThenBlasSpr(blas::UpperLower uplo, uint64 n, double alpha, PARAM(ap)); ThenBlasImpl &, - int, DeviceMemory *> impl; + int, DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasSpr, uplo, n, alpha, x, incx, ap); } @@ -3207,8 +3148,8 @@ Stream &Stream::ThenBlasSpr2(blas::UpperLower uplo, uint64 n, float alpha, PARAM(y), PARAM(incy), PARAM(ap)); ThenBlasImpl &, - int, const DeviceMemory &, int, - DeviceMemory *> impl; + int, const DeviceMemory &, int, DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasSpr2, uplo, n, alpha, x, incx, y, incy, ap); } @@ -3221,8 +3162,8 @@ Stream &Stream::ThenBlasSpr2(blas::UpperLower uplo, uint64 n, double alpha, PARAM(y), PARAM(incy), PARAM(ap)); ThenBlasImpl &, - int, const DeviceMemory &, int, - DeviceMemory *> impl; + int, const DeviceMemory &, int, DeviceMemory *> + impl; return impl(this, &blas::BlasSupport::DoBlasSpr2, uplo, n, alpha, x, incx, y, incy, ap); } @@ -3236,7 +3177,8 @@ Stream &Stream::ThenBlasSymv(blas::UpperLower uplo, uint64 n, float alpha, ThenBlasImpl &, int, const DeviceMemory &, int, float, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSymv, uplo, n, alpha, a, lda, x, incx, beta, y, incy); } @@ -3250,7 +3192,8 @@ Stream &Stream::ThenBlasSymv(blas::UpperLower uplo, uint64 n, double alpha, ThenBlasImpl &, int, const DeviceMemory &, int, double, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSymv, uplo, n, alpha, a, lda, x, incx, beta, y, incy); } @@ -3262,7 +3205,8 @@ Stream &Stream::ThenBlasSyr(blas::UpperLower uplo, uint64 n, float alpha, PARAM(a), PARAM(lda)); ThenBlasImpl &, - int, DeviceMemory *, int> impl; + int, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyr, uplo, n, alpha, x, incx, a, lda); } @@ -3274,7 +3218,8 @@ Stream &Stream::ThenBlasSyr(blas::UpperLower uplo, uint64 n, double alpha, PARAM(a), PARAM(lda)); ThenBlasImpl &, - int, DeviceMemory *, int> impl; + int, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyr, uplo, n, alpha, x, incx, a, lda); } @@ -3288,7 +3233,8 @@ Stream &Stream::ThenBlasSyr2(blas::UpperLower uplo, uint64 n, float alpha, ThenBlasImpl &, int, const DeviceMemory &, int, DeviceMemory *, - int> impl; + int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyr2, uplo, n, alpha, x, incx, y, incy, a, lda); } @@ -3302,7 +3248,8 @@ Stream &Stream::ThenBlasSyr2(blas::UpperLower uplo, uint64 n, double alpha, ThenBlasImpl &, int, const DeviceMemory &, int, DeviceMemory *, - int> impl; + int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyr2, uplo, n, alpha, x, incx, y, incy, a, lda); } @@ -3316,7 +3263,8 @@ Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl &, int, DeviceMemory *, - int> impl; + int> + impl; return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a, lda, x, incx); } @@ -3330,7 +3278,8 @@ Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl &, int, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a, lda, x, incx); } @@ -3345,7 +3294,8 @@ Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a, lda, x, incx); } @@ -3360,7 +3310,8 @@ Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a, lda, x, incx); } @@ -3374,7 +3325,8 @@ Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl &, int, DeviceMemory *, - int> impl; + int> + impl; return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a, lda, x, incx); } @@ -3388,7 +3340,8 @@ Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl &, int, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a, lda, x, incx); } @@ -3403,7 +3356,8 @@ Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a, lda, x, incx); } @@ -3418,7 +3372,8 @@ Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a, lda, x, incx); } @@ -3431,7 +3386,8 @@ Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans, PARAM(x), PARAM(incx)); ThenBlasImpl &, DeviceMemory *, int> impl; + const DeviceMemory &, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x, incx); } @@ -3444,7 +3400,8 @@ Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans, PARAM(x), PARAM(incx)); ThenBlasImpl &, DeviceMemory *, int> impl; + const DeviceMemory &, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x, incx); } @@ -3458,7 +3415,8 @@ Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x, incx); } @@ -3472,7 +3430,8 @@ Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x, incx); } @@ -3485,7 +3444,8 @@ Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans, PARAM(x), PARAM(incx)); ThenBlasImpl &, DeviceMemory *, int> impl; + const DeviceMemory &, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x, incx); } @@ -3498,7 +3458,8 @@ Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans, PARAM(x), PARAM(incx)); ThenBlasImpl &, DeviceMemory *, int> impl; + const DeviceMemory &, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x, incx); } @@ -3512,7 +3473,8 @@ Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x, incx); } @@ -3526,7 +3488,8 @@ Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x, incx); } @@ -3539,8 +3502,8 @@ Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans, PARAM(lda), PARAM(x), PARAM(incx)); ThenBlasImpl &, int, DeviceMemory *, - int> impl; + const DeviceMemory &, int, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a, lda, x, incx); } @@ -3553,8 +3516,8 @@ Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans, PARAM(lda), PARAM(x), PARAM(incx)); ThenBlasImpl &, int, DeviceMemory *, - int> impl; + const DeviceMemory &, int, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a, lda, x, incx); } @@ -3569,7 +3532,8 @@ Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a, lda, x, incx); } @@ -3584,7 +3548,8 @@ Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a, lda, x, incx); } @@ -3597,8 +3562,8 @@ Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans, PARAM(lda), PARAM(x), PARAM(incx)); ThenBlasImpl &, int, DeviceMemory *, - int> impl; + const DeviceMemory &, int, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a, lda, x, incx); } @@ -3611,8 +3576,8 @@ Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans, PARAM(lda), PARAM(x), PARAM(incx)); ThenBlasImpl &, int, DeviceMemory *, - int> impl; + const DeviceMemory &, int, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a, lda, x, incx); } @@ -3627,7 +3592,8 @@ Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a, lda, x, incx); } @@ -3642,7 +3608,8 @@ Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a, lda, x, incx); } @@ -3651,16 +3618,17 @@ Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, uint64 m, uint64 n, uint64 k, float alpha, const DeviceMemory &a, int lda, const DeviceMemory &b, int ldb, - float beta, - DeviceMemory *c, int ldc) { + float beta, DeviceMemory *c, + int ldc) { VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k), PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb), PARAM(beta), PARAM(c), PARAM(ldc)); ThenBlasImpl &, int, - const DeviceMemory &, int, - float, DeviceMemory *, int> impl; + const DeviceMemory &, int, float, + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -3676,7 +3644,8 @@ Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, ThenBlasImpl &, int, const DeviceMemory &, - int, float, DeviceMemory *, int> impl; + int, float, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -3692,7 +3661,8 @@ Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, ThenBlasImpl &, int, const DeviceMemory &, - int, double, DeviceMemory *, int> impl; + int, double, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -3712,8 +3682,8 @@ Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -3733,8 +3703,8 @@ Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4100,8 +4070,8 @@ Stream &Stream::ThenBlasHemm(blas::Side side, blas::UpperLower uplo, uint64 m, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHemm, side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4120,8 +4090,8 @@ Stream &Stream::ThenBlasHemm(blas::Side side, blas::UpperLower uplo, uint64 m, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHemm, side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4136,7 +4106,8 @@ Stream &Stream::ThenBlasHerk(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, int, float, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHerk, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } @@ -4151,7 +4122,8 @@ Stream &Stream::ThenBlasHerk(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl> &, int, double, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHerk, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } @@ -4170,7 +4142,8 @@ Stream &Stream::ThenBlasHer2k(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, float, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHer2k, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4189,7 +4162,8 @@ Stream &Stream::ThenBlasHer2k(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, double, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasHer2k, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4205,7 +4179,8 @@ Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m, ThenBlasImpl &, int, const DeviceMemory &, - int, float, DeviceMemory *, int> impl; + int, float, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4221,7 +4196,8 @@ Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m, ThenBlasImpl &, int, const DeviceMemory &, - int, double, DeviceMemory *, int> impl; + int, double, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4240,8 +4216,8 @@ Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4260,8 +4236,8 @@ Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4275,7 +4251,8 @@ Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl &, int, float, DeviceMemory *, - int> impl; + int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } @@ -4289,7 +4266,8 @@ Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl &, int, double, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } @@ -4305,7 +4283,8 @@ Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl, const DeviceMemory> &, int, std::complex, DeviceMemory> *, - int> impl; + int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } @@ -4321,7 +4300,8 @@ Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl, const DeviceMemory> &, int, std::complex, DeviceMemory> *, - int> impl; + int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } @@ -4337,7 +4317,8 @@ Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl &, int, const DeviceMemory &, - int, float, DeviceMemory *, int> impl; + int, float, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4353,7 +4334,8 @@ Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl &, int, const DeviceMemory &, - int, double, DeviceMemory *, int> impl; + int, double, DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4372,8 +4354,8 @@ Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4392,8 +4374,8 @@ Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans, ThenBlasImpl, const DeviceMemory> &, int, const DeviceMemory> &, int, - std::complex, DeviceMemory> *, - int> impl; + std::complex, DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } @@ -4408,7 +4390,8 @@ Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo, ThenBlasImpl &, int, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -4423,7 +4406,8 @@ Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo, ThenBlasImpl &, int, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -4440,7 +4424,8 @@ Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo, ThenBlasImpl, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -4457,7 +4442,8 @@ Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo, ThenBlasImpl, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -4472,7 +4458,8 @@ Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo, ThenBlasImpl &, int, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -4487,7 +4474,8 @@ Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo, ThenBlasImpl &, int, - DeviceMemory *, int> impl; + DeviceMemory *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -4504,7 +4492,8 @@ Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo, ThenBlasImpl, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -4521,7 +4510,8 @@ Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo, ThenBlasImpl, const DeviceMemory> &, int, - DeviceMemory> *, int> impl; + DeviceMemory> *, int> + impl; return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -4814,17 +4804,11 @@ Stream &Stream::ThenBlasGemmStridedBatched( Stream &Stream::ThenSetRngSeed(const uint8 *seed, uint64 seed_bytes) { VLOG_CALL(PARAM(seed), PARAM(seed_bytes)); - if (ok()) { - if (rng::RngSupport *rng = parent_->AsRng()) { - CheckError(rng->SetSeed(this, seed, seed_bytes)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() << " unable to initialize RNG"; - } + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->SetSeed(this, seed, seed_bytes)); } else { - LOG(INFO) << DebugStreamPointers() - << " did not set RNG seed: " << static_cast(seed) - << "; bytes: " << seed_bytes; + SetError(); + LOG(INFO) << DebugStreamPointers() << " unable to initialize RNG"; } return *this; } @@ -4832,15 +4816,13 @@ Stream &Stream::ThenSetRngSeed(const uint8 *seed, uint64 seed_bytes) { Stream &Stream::ThenPopulateRandUniform(DeviceMemory *values) { VLOG_CALL(PARAM(values)); - if (ok()) { - if (rng::RngSupport *rng = parent_->AsRng()) { - CheckError(rng->DoPopulateRandUniform(this, values)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform RNG operation using StreamExecutor" - " without RNG support."; - } + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandUniform(this, values)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform RNG operation using StreamExecutor" + " without RNG support."; } return *this; } @@ -4849,15 +4831,13 @@ Stream &Stream::ThenPopulateRandGaussian(float mean, float sd, DeviceMemory *values) { VLOG_CALL(PARAM(mean), PARAM(sd), PARAM(values)); - if (ok()) { - if (rng::RngSupport *rng = parent_->AsRng()) { - CheckError(rng->DoPopulateRandGaussian(this, mean, sd, values)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform RNG operation using StreamExecutor" - " without RNG support."; - } + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandGaussian(this, mean, sd, values)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform RNG operation using StreamExecutor" + " without RNG support."; } return *this; } @@ -4866,15 +4846,13 @@ Stream &Stream::ThenPopulateRandGaussian(double mean, double sd, DeviceMemory *values) { VLOG_CALL(PARAM(mean), PARAM(sd), PARAM(values)); - if (ok()) { - if (rng::RngSupport *rng = parent_->AsRng()) { - CheckError(rng->DoPopulateRandGaussian(this, mean, sd, values)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform RNG operation using StreamExecutor" - " without RNG support."; - } + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandGaussian(this, mean, sd, values)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform RNG operation using StreamExecutor" + " without RNG support."; } return *this; } @@ -4882,15 +4860,13 @@ Stream &Stream::ThenPopulateRandGaussian(double mean, double sd, Stream &Stream::ThenPopulateRandUniform(DeviceMemory *values) { VLOG_CALL(PARAM(values)); - if (ok()) { - if (rng::RngSupport *rng = parent_->AsRng()) { - CheckError(rng->DoPopulateRandUniform(this, values)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform RNG operation using StreamExecutor" - " without RNG support."; - } + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandUniform(this, values)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform RNG operation using StreamExecutor" + " without RNG support."; } return *this; } @@ -4899,15 +4875,13 @@ Stream &Stream::ThenPopulateRandUniform( DeviceMemory> *values) { VLOG_CALL(PARAM(values)); - if (ok()) { - if (rng::RngSupport *rng = parent_->AsRng()) { - CheckError(rng->DoPopulateRandUniform(this, values)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform RNG operation using StreamExecutor" - " without RNG support."; - } + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandUniform(this, values)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform RNG operation using StreamExecutor" + " without RNG support."; } return *this; } @@ -4916,15 +4890,13 @@ Stream &Stream::ThenPopulateRandUniform( DeviceMemory> *values) { VLOG_CALL(PARAM(values)); - if (ok()) { - if (rng::RngSupport *rng = parent_->AsRng()) { - CheckError(rng->DoPopulateRandUniform(this, values)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform RNG operation using StreamExecutor" - " without RNG support."; - } + if (rng::RngSupport *rng = parent_->AsRng()) { + CheckError(rng->DoPopulateRandUniform(this, values)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform RNG operation using StreamExecutor" + " without RNG support."; } return *this; } @@ -4933,12 +4905,7 @@ Stream &Stream::ThenMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src, uint64 size) { VLOG_CALL(PARAM(host_dst), PARAM(gpu_src), PARAM(size)); - if (ok()) { - CheckError(parent_->Memcpy(this, host_dst, gpu_src, size)); - } else { - LOG(INFO) << DebugStreamPointers() - << " did not memcpy device-to-host; source: " << gpu_src.opaque(); - } + CheckError(parent_->Memcpy(this, host_dst, gpu_src, size)); return *this; } @@ -4946,12 +4913,7 @@ Stream &Stream::ThenMemcpy(DeviceMemoryBase *gpu_dst, const void *host_src, uint64 size) { VLOG_CALL(PARAM(gpu_dst), PARAM(host_src), PARAM(size)); - if (ok()) { - CheckError(parent_->Memcpy(this, gpu_dst, host_src, size)); - } else { - LOG(INFO) << DebugStreamPointers() - << " did not memcpy host-to-device; source: " << host_src; - } + CheckError(parent_->Memcpy(this, gpu_dst, host_src, size)); return *this; } @@ -4959,24 +4921,14 @@ Stream &Stream::ThenMemcpy(DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src, uint64 size) { VLOG_CALL(PARAM(gpu_dst), PARAM(gpu_src), PARAM(size)); - if (ok()) { - CheckError(parent_->MemcpyDeviceToDevice(this, gpu_dst, gpu_src, size)); - } else { - LOG(INFO) << DebugStreamPointers() - << " did not memcpy gpu-to-gpu; source: " << &gpu_src; - } + CheckError(parent_->MemcpyDeviceToDevice(this, gpu_dst, gpu_src, size)); return *this; } Stream &Stream::ThenMemZero(DeviceMemoryBase *location, uint64 size) { VLOG_CALL(PARAM(location), PARAM(size)); - if (ok()) { - CheckStatus(parent_->MemZero(this, location, size)); - } else { - LOG(INFO) << DebugStreamPointers() - << " did not memzero GPU location; source: " << location; - } + CheckStatus(parent_->MemZero(this, location, size)); return *this; } @@ -4984,13 +4936,7 @@ Stream &Stream::ThenMemset32(DeviceMemoryBase *location, uint32 pattern, uint64 size) { VLOG_CALL(PARAM(location), PARAM(pattern), PARAM(size)); - if (ok()) { - CheckStatus(parent_->Memset32(this, location, pattern, size)); - } else { - LOG(INFO) << DebugStreamPointers() - << " did not memset GPU location; source: " << location - << "; size: " << size << "; pattern: " << std::hex << pattern; - } + CheckStatus(parent_->Memset32(this, location, pattern, size)); return *this; } @@ -5013,20 +4959,17 @@ Stream &Stream::ThenRnnForward( ScratchAllocator *workspace_allocator, dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoRnnForward( - this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, - input_c_desc, input_c_data, params, output_desc, output_data, - output_h_desc, output_h_data, output_c_desc, output_c_data, - is_training, reserve_space_allocator, workspace_allocator, - output_profile_result); - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoRnnForward( + this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, + input_c_desc, input_c_data, params, output_desc, output_data, + output_h_desc, output_h_data, output_c_desc, output_c_data, is_training, + reserve_space_allocator, workspace_allocator, output_profile_result); + if (!status && !output_profile_result) { + SetError(); } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -5049,20 +4992,17 @@ Stream &Stream::ThenRnnForward( ScratchAllocator *workspace_allocator, dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoRnnForward( - this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, - input_c_desc, input_c_data, params, output_desc, output_data, - output_h_desc, output_h_data, output_c_desc, output_c_data, - is_training, reserve_space_allocator, workspace_allocator, - output_profile_result); - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoRnnForward( + this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, + input_c_desc, input_c_data, params, output_desc, output_data, + output_h_desc, output_h_data, output_c_desc, output_c_data, is_training, + reserve_space_allocator, workspace_allocator, output_profile_result); + if (!status && !output_profile_result) { + SetError(); } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -5086,20 +5026,17 @@ Stream &Stream::ThenRnnForward( ScratchAllocator *workspace_allocator, dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoRnnForward( - this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, - input_c_desc, input_c_data, params, output_desc, output_data, - output_h_desc, output_h_data, output_c_desc, output_c_data, - is_training, reserve_space_allocator, workspace_allocator, - output_profile_result); - if (!status && !output_profile_result) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoRnnForward( + this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, + input_c_desc, input_c_data, params, output_desc, output_data, + output_h_desc, output_h_data, output_c_desc, output_c_data, is_training, + reserve_space_allocator, workspace_allocator, output_profile_result); + if (!status && !output_profile_result) { + SetError(); } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -5130,23 +5067,21 @@ Stream &Stream::ThenRnnBackward( ScratchAllocator *workspace_allocator, dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoRnnBackward( - this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, - input_c_desc, input_c_data, params, output_desc, output_data, - output_h_desc, output_h_data, output_c_desc, output_c_data, - output_backprop_data, output_h_backprop_data, output_c_backprop_data, - input_backprop_data, input_h_backprop_data, input_c_backprop_data, - params_backprop_data, reserve_space_data, workspace_allocator, - output_profile_result); - if (!status && !output_profile_result) { - SetError(); - } - } else { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoRnnBackward( + this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, + input_c_desc, input_c_data, params, output_desc, output_data, + output_h_desc, output_h_data, output_c_desc, output_c_data, + output_backprop_data, output_h_backprop_data, output_c_backprop_data, + input_backprop_data, input_h_backprop_data, input_c_backprop_data, + params_backprop_data, reserve_space_data, workspace_allocator, + output_profile_result); + if (!status && !output_profile_result) { SetError(); - LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support"; } + } else { + SetError(); + LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support"; } return *this; } @@ -5176,23 +5111,21 @@ Stream &Stream::ThenRnnBackward( ScratchAllocator *workspace_allocator, dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoRnnBackward( - this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, - input_c_desc, input_c_data, params, output_desc, output_data, - output_h_desc, output_h_data, output_c_desc, output_c_data, - output_backprop_data, output_h_backprop_data, output_c_backprop_data, - input_backprop_data, input_h_backprop_data, input_c_backprop_data, - params_backprop_data, reserve_space_data, workspace_allocator, - output_profile_result); - if (!status && !output_profile_result) { - SetError(); - } - } else { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoRnnBackward( + this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, + input_c_desc, input_c_data, params, output_desc, output_data, + output_h_desc, output_h_data, output_c_desc, output_c_data, + output_backprop_data, output_h_backprop_data, output_c_backprop_data, + input_backprop_data, input_h_backprop_data, input_c_backprop_data, + params_backprop_data, reserve_space_data, workspace_allocator, + output_profile_result); + if (!status && !output_profile_result) { SetError(); - LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support"; } + } else { + SetError(); + LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support"; } return *this; } @@ -5223,23 +5156,21 @@ Stream &Stream::ThenRnnBackward( ScratchAllocator *workspace_allocator, dnn::ProfileResult *output_profile_result) { // TODO(zhengxq): add VLOG PARAM calls. - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - auto status = dnn->DoRnnBackward( - this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, - input_c_desc, input_c_data, params, output_desc, output_data, - output_h_desc, output_h_data, output_c_desc, output_c_data, - output_backprop_data, output_h_backprop_data, output_c_backprop_data, - input_backprop_data, input_h_backprop_data, input_c_backprop_data, - params_backprop_data, reserve_space_data, workspace_allocator, - output_profile_result); - if (!status && !output_profile_result) { - SetError(); - } - } else { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoRnnBackward( + this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data, + input_c_desc, input_c_data, params, output_desc, output_data, + output_h_desc, output_h_data, output_c_desc, output_c_data, + output_backprop_data, output_h_backprop_data, output_c_backprop_data, + input_backprop_data, input_h_backprop_data, input_c_backprop_data, + params_backprop_data, reserve_space_data, workspace_allocator, + output_profile_result); + if (!status && !output_profile_result) { SetError(); - LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support"; } + } else { + SetError(); + LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support"; } return *this; } @@ -5253,28 +5184,26 @@ Stream &Stream::ThenCtcLoss(const dnn::RnnStateTensorDescriptor &probs_desc, const dnn::RnnStateTensorDescriptor &grads_desc, DeviceMemory *grads_data, ScratchAllocator *workspace_allocator) { - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - DeviceMemory scratch_memory; - int ctc_loss_algo_id; - auto status = - dnn->PrepareForCtcLoss(this, probs_desc, probs_data, grads_desc, - labels_data, labels_lengths_data, - input_lengths_data, workspace_allocator, - &scratch_memory, &ctc_loss_algo_id) - .ok(); - if (status) { - status = dnn->DoCtcLoss(this, probs_desc, probs_data, labels_data, - labels_lengths_data, input_lengths_data, - costs_data, grads_desc, grads_data, - &scratch_memory, ctc_loss_algo_id); - } - if (!status) { - SetError(); - } - } else { - SetErrorAndLogNoDnnSupport(); + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + DeviceMemory scratch_memory; + int ctc_loss_algo_id; + auto status = + dnn->PrepareForCtcLoss(this, probs_desc, probs_data, grads_desc, + labels_data, labels_lengths_data, + input_lengths_data, workspace_allocator, + &scratch_memory, &ctc_loss_algo_id) + .ok(); + if (status) { + status = dnn->DoCtcLoss(this, probs_desc, probs_data, labels_data, + labels_lengths_data, input_lengths_data, + costs_data, grads_desc, grads_data, + &scratch_memory, ctc_loss_algo_id); } + if (!status) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -5288,14 +5217,12 @@ Stream &Stream::ThenTransformTensor(const dnn::BatchDescriptor &input_desc, VLOG_CALL(PARAM(input_desc), PARAM(input_type), PARAM(input_data), PARAM(output_desc), PARAM(output_type), PARAM(scale), PARAM(output_data)); - if (ok()) { - if (dnn::DnnSupport *dnn = parent_->AsDnn()) { - CheckError(dnn->DoTransformTensor(this, input_desc, input_type, - input_data, output_desc, output_type, - scale, output_data)); - } else { - SetErrorAndLogNoDnnSupport(); - } + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoTransformTensor(this, input_desc, input_type, input_data, + output_desc, output_type, scale, + output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } return *this; } @@ -5342,15 +5269,13 @@ Stream &Stream::ThenFft(fft::Plan *plan, DeviceMemory> *output) { VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); - if (ok()) { - if (fft::FftSupport *fft = parent_->AsFft()) { - CheckError(fft->DoFft(this, plan, input, output)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform FFT operation using StreamExecutor" - " without FFT support"; - } + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform FFT operation using StreamExecutor" + " without FFT support"; } return *this; } @@ -5360,15 +5285,13 @@ Stream &Stream::ThenFft(fft::Plan *plan, DeviceMemory> *output) { VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); - if (ok()) { - if (fft::FftSupport *fft = parent_->AsFft()) { - CheckError(fft->DoFft(this, plan, input, output)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform FFT operation using StreamExecutor" - " without FFT support"; - } + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform FFT operation using StreamExecutor" + " without FFT support"; } return *this; } @@ -5377,15 +5300,13 @@ Stream &Stream::ThenFft(fft::Plan *plan, const DeviceMemory &input, DeviceMemory> *output) { VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); - if (ok()) { - if (fft::FftSupport *fft = parent_->AsFft()) { - CheckError(fft->DoFft(this, plan, input, output)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform FFT operation using StreamExecutor" - " without FFT support"; - } + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform FFT operation using StreamExecutor" + " without FFT support"; } return *this; } @@ -5394,15 +5315,13 @@ Stream &Stream::ThenFft(fft::Plan *plan, const DeviceMemory &input, DeviceMemory> *output) { VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); - if (ok()) { - if (fft::FftSupport *fft = parent_->AsFft()) { - CheckError(fft->DoFft(this, plan, input, output)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform FFT operation using StreamExecutor" - " without FFT support"; - } + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform FFT operation using StreamExecutor" + " without FFT support"; } return *this; } @@ -5412,15 +5331,13 @@ Stream &Stream::ThenFft(fft::Plan *plan, DeviceMemory *output) { VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); - if (ok()) { - if (fft::FftSupport *fft = parent_->AsFft()) { - CheckError(fft->DoFft(this, plan, input, output)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform FFT operation using StreamExecutor" - " without FFT support"; - } + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform FFT operation using StreamExecutor" + " without FFT support"; } return *this; } @@ -5430,15 +5347,13 @@ Stream &Stream::ThenFft(fft::Plan *plan, DeviceMemory *output) { VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output)); - if (ok()) { - if (fft::FftSupport *fft = parent_->AsFft()) { - CheckError(fft->DoFft(this, plan, input, output)); - } else { - SetError(); - LOG(INFO) << DebugStreamPointers() - << " attempting to perform FFT operation using StreamExecutor" - " without FFT support"; - } + if (fft::FftSupport *fft = parent_->AsFft()) { + CheckError(fft->DoFft(this, plan, input, output)); + } else { + SetError(); + LOG(INFO) << DebugStreamPointers() + << " attempting to perform FFT operation using StreamExecutor" + " without FFT support"; } return *this; } @@ -5500,7 +5415,7 @@ void Stream::CheckStatus(port::Status status) { } LOG(ERROR) << status; absl::MutexLock lock(&mu_); - ok_ = false; + status_ = status; } } // namespace stream_executor diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index bf727d63da2..bfe442641ad 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -2026,7 +2026,7 @@ class Stream { bool InErrorState() const TF_LOCKS_EXCLUDED(mu_) { absl::ReaderMutexLock lock(&mu_); - return !ok_; + return !status_.ok(); } // Sets the error state if operation_retcode is false. @@ -2036,7 +2036,7 @@ class Stream { return; } absl::MutexLock lock(&mu_); - ok_ = false; + status_ = port::InternalError("Unknown error"); } // Checks the status and logs the error message, if any. @@ -2070,9 +2070,8 @@ class Stream { // See StreamExecutor::AllocateStream. bool allocated_ TF_GUARDED_BY(mu_); - // Whether all operations have entrained successfully to the current program - // point. - bool ok_ TF_GUARDED_BY(mu_); + // The last error (if any) of all method calls. + port::Status status_ TF_GUARDED_BY(mu_); // Sub-streams that are generated from this stream. Each element has a pointer // to sub-stream and a boolean value indicating if this substream is ready to