diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 3b3323b4f42..ac7aac9b105 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -3703,7 +3703,7 @@ port::Status CudnnSupport::DoBatchNormalizationBackwardImpl(
   return port::Status::OK();
 }
 
-bool CudnnSupport::DoFusedConvolve(
+port::Status CudnnSupport::DoFusedConvolve(
     Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
     const DeviceMemory<double>& conv_input_data, double conv_input_scale,
     const dnn::FilterDescriptor& filter_descriptor,
@@ -3716,18 +3716,16 @@ bool CudnnSupport::DoFusedConvolve(
     DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
-  return IsStatusOk(
-      DoFusedConvolveImpl(
-          stream, conv_input_descriptor, conv_input_data, conv_input_scale,
-          filter_descriptor, filter_data, convolution_descriptor,
-          side_input_data, side_input_scale, bias_descriptor, biases,
-          activation_mode, output_descriptor, output_data,
-          GetConvAccumulatorType(dnn::DataType::kDouble), scratch_allocator,
-          algorithm_config, output_profile_result),
-      /*report_error=*/!output_profile_result);
+  return DoFusedConvolveImpl(
+      stream, conv_input_descriptor, conv_input_data, conv_input_scale,
+      filter_descriptor, filter_data, convolution_descriptor, side_input_data,
+      side_input_scale, bias_descriptor, biases, activation_mode,
+      output_descriptor, output_data,
+      GetConvAccumulatorType(dnn::DataType::kDouble), scratch_allocator,
+      algorithm_config, output_profile_result);
 }
 
-bool CudnnSupport::DoFusedConvolve(
+port::Status CudnnSupport::DoFusedConvolve(
     Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
     const DeviceMemory<float>& conv_input_data, float conv_input_scale,
     const dnn::FilterDescriptor& filter_descriptor,
@@ -3740,18 +3738,16 @@ bool CudnnSupport::DoFusedConvolve(
     DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
-  return IsStatusOk(
-      DoFusedConvolveImpl(
-          stream, conv_input_descriptor, conv_input_data, conv_input_scale,
-          filter_descriptor, filter_data, convolution_descriptor,
-          side_input_data, side_input_scale, bias_descriptor, biases,
-          activation_mode, output_descriptor, output_data,
-          GetConvAccumulatorType(dnn::DataType::kFloat), scratch_allocator,
-          algorithm_config, output_profile_result),
-      /*report_error=*/!output_profile_result);
+  return DoFusedConvolveImpl(
+      stream, conv_input_descriptor, conv_input_data, conv_input_scale,
+      filter_descriptor, filter_data, convolution_descriptor, side_input_data,
+      side_input_scale, bias_descriptor, biases, activation_mode,
+      output_descriptor, output_data,
+      GetConvAccumulatorType(dnn::DataType::kFloat), scratch_allocator,
+      algorithm_config, output_profile_result);
 }
 
-bool CudnnSupport::DoFusedConvolve(
+port::Status CudnnSupport::DoFusedConvolve(
     Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
     const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
     const dnn::FilterDescriptor& filter_descriptor,
@@ -3765,18 +3761,16 @@ bool CudnnSupport::DoFusedConvolve(
     DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
-  return IsStatusOk(
-      DoFusedConvolveImpl(
-          stream, conv_input_descriptor, conv_input_data, conv_input_scale,
-          filter_descriptor, filter_data, convolution_descriptor,
-          side_input_data, side_input_scale, bias_descriptor, biases,
-          activation_mode, output_descriptor, output_data,
-          GetConvAccumulatorType(dnn::DataType::kHalf), scratch_allocator,
-          algorithm_config, output_profile_result),
-      /*report_error=*/!output_profile_result);
+  return DoFusedConvolveImpl(
+      stream, conv_input_descriptor, conv_input_data, conv_input_scale,
+      filter_descriptor, filter_data, convolution_descriptor, side_input_data,
+      side_input_scale, bias_descriptor, biases, activation_mode,
+      output_descriptor, output_data,
+      GetConvAccumulatorType(dnn::DataType::kHalf), scratch_allocator,
+      algorithm_config, output_profile_result);
 }
 
-bool CudnnSupport::DoFusedConvolve(
+port::Status CudnnSupport::DoFusedConvolve(
     Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
     const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
     const dnn::FilterDescriptor& filter_descriptor,
@@ -3793,23 +3787,21 @@ bool CudnnSupport::DoFusedConvolve(
   std::tie(cc_major, cc_minor) = GetCcMajorMinor(stream);
 
   if (cc_major < 6 || (cc_major == 6 && cc_minor < 1)) {
-    LOG(WARNING) << "cudnnConvolutionBiasActivationForward() for int8 is only "
-                    "supported on GPUs with compute capability 6.1 or later.";
-    return false;
+    return port::UnimplementedError(
+        "cudnnConvolutionBiasActivationForward() for int8 is only supported on "
+        "GPUs with compute capability 6.1 or later.");
   }
 
-  return IsStatusOk(
-      DoFusedConvolveImpl(
-          stream, conv_input_descriptor, conv_input_data, conv_input_scale,
-          filter_descriptor, filter_data, convolution_descriptor,
-          side_input_data, side_input_scale, bias_descriptor, biases,
-          activation_mode, output_descriptor, output_data,
-          GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator,
-          algorithm_config, output_profile_result),
-      /*report_error=*/!output_profile_result);
+  return DoFusedConvolveImpl(
+      stream, conv_input_descriptor, conv_input_data, conv_input_scale,
+      filter_descriptor, filter_data, convolution_descriptor, side_input_data,
+      side_input_scale, bias_descriptor, biases, activation_mode,
+      output_descriptor, output_data,
+      GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator,
+      algorithm_config, output_profile_result);
 }
 
-bool CudnnSupport::DoFusedConvolve(
+port::Status CudnnSupport::DoFusedConvolve(
     Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
     const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
     const dnn::FilterDescriptor& filter_descriptor,
@@ -3826,20 +3818,18 @@ bool CudnnSupport::DoFusedConvolve(
   stream->parent()->GetDeviceDescription().cuda_compute_capability(&cc_major,
                                                                    &cc_minor);
   if (cc_major < 6 || (cc_major == 6 && cc_minor < 1)) {
-    LOG(WARNING) << "cudnnConvolutionBiasActivationForward() for int8 is only "
-                    "supported on GPUs with compute capability 6.1 or later.";
-    return false;
+    return port::UnimplementedError(
+        "cudnnConvolutionBiasActivationForward() for int8 is only supported on "
+        "GPUs with compute capability 6.1 or later.");
   }
 
-  return IsStatusOk(
-      DoFusedConvolveImpl(
-          stream, conv_input_descriptor, conv_input_data, conv_input_scale,
-          filter_descriptor, filter_data, convolution_descriptor,
-          side_input_data, side_input_scale, bias_descriptor, biases,
-          activation_mode, output_descriptor, output_data,
-          GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator,
-          algorithm_config, output_profile_result),
-      /*report_error=*/!output_profile_result);
+  return DoFusedConvolveImpl(
+      stream, conv_input_descriptor, conv_input_data, conv_input_scale,
+      filter_descriptor, filter_data, convolution_descriptor, side_input_data,
+      side_input_scale, bias_descriptor, biases, activation_mode,
+      output_descriptor, output_data,
+      GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator,
+      algorithm_config, output_profile_result);
 }
 
 port::Status CudnnSupport::DoPrepareForCtcLoss(
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index 181502e03ee..9cab982c9a1 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -277,7 +277,7 @@ class CudnnSupport : public dnn::DnnSupport {
       dnn::AlgorithmDesc algorithm_desc, DeviceMemory<uint8> scratch_memory,
       dnn::ProfileResult* output_profile_result) override;
 
-  bool DoFusedConvolve(
+  port::Status DoFusedConvolve(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
       const DeviceMemory<double>& conv_input_data, double conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
@@ -291,7 +291,7 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) override;
 
-  bool DoFusedConvolve(
+  port::Status DoFusedConvolve(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
       const DeviceMemory<float>& conv_input_data, float conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
@@ -305,25 +305,23 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) override;
 
-  bool DoFusedConvolve(Stream* stream,
-                       const dnn::BatchDescriptor& conv_input_descriptor,
-                       const DeviceMemory<Eigen::half>& conv_input_data,
-                       float conv_input_scale,
-                       const dnn::FilterDescriptor& filter_descriptor,
-                       const DeviceMemory<Eigen::half>& filter_data,
-                       const dnn::ConvolutionDescriptor& convolution_descriptor,
-                       const DeviceMemory<Eigen::half>& side_input_data,
-                       float side_input_scale,
-                       const dnn::BatchDescriptor& bias_descriptor,
-                       const DeviceMemory<Eigen::half>& biases,
-                       dnn::ActivationMode activation_mode,
-                       const dnn::BatchDescriptor& output_descriptor,
-                       DeviceMemory<Eigen::half>* output_data,
-                       ScratchAllocator* scratch_allocator,
-                       const dnn::AlgorithmConfig& algorithm_config,
-                       dnn::ProfileResult* output_profile_result) override;
+  port::Status DoFusedConvolve(
+      Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
+      const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
+      const dnn::FilterDescriptor& filter_descriptor,
+      const DeviceMemory<Eigen::half>& filter_data,
+      const dnn::ConvolutionDescriptor& convolution_descriptor,
+      const DeviceMemory<Eigen::half>& side_input_data, float side_input_scale,
+      const dnn::BatchDescriptor& bias_descriptor,
+      const DeviceMemory<Eigen::half>& biases,
+      dnn::ActivationMode activation_mode,
+      const dnn::BatchDescriptor& output_descriptor,
+      DeviceMemory<Eigen::half>* output_data,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
+      dnn::ProfileResult* output_profile_result) override;
 
-  bool DoFusedConvolve(
+  port::Status DoFusedConvolve(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
       const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
@@ -337,7 +335,7 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) override;
 
-  bool DoFusedConvolve(
+  port::Status DoFusedConvolve(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
       const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 7b45ec2cc87..53cdff8cb7a 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -1163,7 +1163,7 @@ class DnnSupport {
   //   that if the inverse of the filter is applied to the output in VALID mode
   //   the result is the same size as the input - this requires even more
   //   padding of the input.
-  virtual bool DoFusedConvolve(
+  virtual port::Status DoFusedConvolve(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
       const DeviceMemory<double>& conv_input_data, double conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
@@ -1176,11 +1176,12 @@ class DnnSupport {
       DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) {
-    return false;
+    return port::UnimplementedError(
+        "DnnSupport::DoFusedConvolve not implemented on this platform.");
   }
 
   // This is the float version of DoFusedConvolve.
-  virtual bool DoFusedConvolve(
+  virtual port::Status DoFusedConvolve(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
       const DeviceMemory<float>& conv_input_data, float conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
@@ -1193,12 +1194,13 @@ class DnnSupport {
       DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) {
-    return false;
+    return port::UnimplementedError(
+        "DnnSupport::DoFusedConvolve not implemented on this platform.");
   }
 
   // This is the Eigen::half version of DoFusedConvolve.
   // The scaling parameters are still floats.
-  virtual bool DoFusedConvolve(
+  virtual port::Status DoFusedConvolve(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
       const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
@@ -1213,12 +1215,13 @@ class DnnSupport {
       ScratchAllocator* scratch_allocator,
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) {
-    return false;
+    return port::UnimplementedError(
+        "DnnSupport::DoFusedConvolve not implemented on this platform.");
   }
 
   // This is the int8 version of DoFusedConvolve.
   // The bias input and scaling parameters are floats.
-  virtual bool DoFusedConvolve(
+  virtual port::Status DoFusedConvolve(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
       const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
@@ -1231,12 +1234,13 @@ class DnnSupport {
       DeviceMemory<int8>* output_data, ScratchAllocator* scratch_allocator,
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) {
-    return false;
+    return port::UnimplementedError(
+        "DnnSupport::DoFusedConvolve not implemented on this platform.");
   }
 
   // This is the int8 version of DoFusedConvolve.
   // The output, bias input and scaling parameters are floats.
-  virtual bool DoFusedConvolve(
+  virtual port::Status DoFusedConvolve(
       Stream* /*stream*/, const dnn::BatchDescriptor& /*conv_input_descriptor*/,
       const DeviceMemory<int8>& /*conv_input_data*/, float /*conv_input_scale*/,
       const dnn::FilterDescriptor& /*filter_descriptor*/,
@@ -1252,7 +1256,8 @@ class DnnSupport {
       ScratchAllocator* /*scratch_allocator*/,
       const dnn::AlgorithmConfig& /*algorithm_config*/,
       dnn::ProfileResult* /*output_profile_result*/) {
-    return false;
+    return port::UnimplementedError(
+        "DnnSupport::DoFusedConvolve not implemented on this platform.");
   }
 
   template <typename ElementType, typename OutputType>
diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.cc b/tensorflow/stream_executor/rocm/rocm_dnn.cc
index 4b2761e7658..80306105d4a 100644
--- a/tensorflow/stream_executor/rocm/rocm_dnn.cc
+++ b/tensorflow/stream_executor/rocm/rocm_dnn.cc
@@ -3680,7 +3680,7 @@ bool MIOpenSupport::DoBatchNormalizationBackwardImpl(
   return true;
 }
 
-bool MIOpenSupport::DoFusedConvolve(
+port::Status MIOpenSupport::DoFusedConvolve(
     Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
     const DeviceMemory<double>& conv_input_data, double conv_input_scale,
     const dnn::FilterDescriptor& filter_descriptor,
@@ -3693,11 +3693,10 @@ bool MIOpenSupport::DoFusedConvolve(
     DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
-  LOG(ERROR) << "fused convolve not implemented yet";
-  return false;
+  return port::UnimplementedError("fused convolve not implemented yet");
 }
 
-bool MIOpenSupport::DoFusedConvolve(
+port::Status MIOpenSupport::DoFusedConvolve(
     Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
     const DeviceMemory<float>& conv_input_data, float conv_input_scale,
     const dnn::FilterDescriptor& filter_descriptor,
@@ -3710,11 +3709,10 @@ bool MIOpenSupport::DoFusedConvolve(
     DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
-  LOG(ERROR) << "fused convolve not implemented yet";
-  return false;
+  return port::UnimplementedError("fused convolve not implemented yet");
 }
 
-bool MIOpenSupport::DoFusedConvolve(
+port::Status MIOpenSupport::DoFusedConvolve(
     Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
     const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
     const dnn::FilterDescriptor& filter_descriptor,
@@ -3728,11 +3726,10 @@ bool MIOpenSupport::DoFusedConvolve(
     DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
-  LOG(ERROR) << "fused convolve not implemented yet";
-  return false;
+  return port::UnimplementedError("fused convolve not implemented yet");
 }
 
-bool MIOpenSupport::DoFusedConvolve(
+port::Status MIOpenSupport::DoFusedConvolve(
     Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
     const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
     const dnn::FilterDescriptor& filter_descriptor,
@@ -3745,8 +3742,7 @@ bool MIOpenSupport::DoFusedConvolve(
     DeviceMemory<int8>* output_data, ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
-  LOG(ERROR) << "fused convolve not implemented yet";
-  return false;
+  return port::UnimplementedError("fused convolve not implemented yet");
 }
 
 bool MIOpenSupport::DoTransformTensor(Stream* stream,
diff --git a/tensorflow/stream_executor/rocm/rocm_dnn.h b/tensorflow/stream_executor/rocm/rocm_dnn.h
index b01c1cc5290..654a1bf8f3a 100644
--- a/tensorflow/stream_executor/rocm/rocm_dnn.h
+++ b/tensorflow/stream_executor/rocm/rocm_dnn.h
@@ -315,7 +315,7 @@ class MIOpenSupport : public dnn::DnnSupport {
       dnn::AlgorithmDesc algorithm_desc, DeviceMemory<uint8> scratch_memory,
       dnn::ProfileResult* output_profile_result) override;
 
-  bool DoFusedConvolve(
+  port::Status DoFusedConvolve(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
       const DeviceMemory<double>& conv_input_data, double conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
@@ -329,7 +329,7 @@ class MIOpenSupport : public dnn::DnnSupport {
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) override;
 
-  bool DoFusedConvolve(
+  port::Status DoFusedConvolve(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
       const DeviceMemory<float>& conv_input_data, float conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
@@ -343,25 +343,23 @@ class MIOpenSupport : public dnn::DnnSupport {
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) override;
 
-  bool DoFusedConvolve(Stream* stream,
-                       const dnn::BatchDescriptor& conv_input_descriptor,
-                       const DeviceMemory<Eigen::half>& conv_input_data,
-                       float conv_input_scale,
-                       const dnn::FilterDescriptor& filter_descriptor,
-                       const DeviceMemory<Eigen::half>& filter_data,
-                       const dnn::ConvolutionDescriptor& convolution_descriptor,
-                       const DeviceMemory<Eigen::half>& side_input_data,
-                       float side_input_scale,
-                       const dnn::BatchDescriptor& bias_descriptor,
-                       const DeviceMemory<Eigen::half>& biases,
-                       dnn::ActivationMode activation_mode,
-                       const dnn::BatchDescriptor& output_descriptor,
-                       DeviceMemory<Eigen::half>* output_data,
-                       ScratchAllocator* scratch_allocator,
-                       const dnn::AlgorithmConfig& algorithm_config,
-                       dnn::ProfileResult* output_profile_result) override;
+  port::Status DoFusedConvolve(
+      Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
+      const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
+      const dnn::FilterDescriptor& filter_descriptor,
+      const DeviceMemory<Eigen::half>& filter_data,
+      const dnn::ConvolutionDescriptor& convolution_descriptor,
+      const DeviceMemory<Eigen::half>& side_input_data, float side_input_scale,
+      const dnn::BatchDescriptor& bias_descriptor,
+      const DeviceMemory<Eigen::half>& biases,
+      dnn::ActivationMode activation_mode,
+      const dnn::BatchDescriptor& output_descriptor,
+      DeviceMemory<Eigen::half>* output_data,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
+      dnn::ProfileResult* output_profile_result) override;
 
-  bool DoFusedConvolve(
+  port::Status DoFusedConvolve(
       Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
       const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
       const dnn::FilterDescriptor& filter_descriptor,
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 505d54cf5bf..62689e61be1 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -251,7 +251,7 @@ Stream::Stream(StreamExecutor *parent)
     : parent_(parent),
       implementation_(parent->implementation()->GetStreamImplementation()),
       allocated_(false),
-      ok_(false),
+      status_(port::InternalError("Uninitialized stream")),
       temporary_memory_manager_(this) {
   VLOG_CALL(PARAM(parent));
 }
@@ -261,7 +261,7 @@ Stream::Stream(StreamExecutor *parent,
     : parent_(parent),
       implementation_(implementation),
       allocated_(false),
-      ok_(false),
+      status_(port::InternalError("Uninitialized stream")),
       temporary_memory_manager_(this) {
   VLOG_CALL(PARAM(parent), PARAM(implementation));
 }
@@ -300,12 +300,12 @@ Stream &Stream::Init() {
   absl::MutexLock lock(&mu_);
   CHECK_EQ(false, allocated_)
       << "stream appears to already have been initialized";
-  CHECK(!ok_) << "stream should be in !ok() state pre-initialization";
+  CHECK(!status_.ok()) << "stream should be in !ok() state pre-initialization";
 
   if (parent_->AllocateStream(this)) {
     // Successful initialization!
     allocated_ = true;
-    ok_ = true;
+    status_ = port::Status::OK();
   } else {
     LOG(ERROR) << "failed to allocate stream during initialization";
   }
@@ -316,11 +316,7 @@ Stream &Stream::Init() {
 Stream &Stream::InitTimer(Timer *timer) {
   VLOG_CALL(PARAM(timer));
 
-  if (ok()) {
     CheckError(parent_->AllocateTimer(timer));
-  } else {
-    LOG(INFO) << "did not allocate timer: " << timer;
-  }
   return *this;
 }
 
@@ -359,17 +355,14 @@ Stream &Stream::ThenBatchNormalizationForward(
     ScratchAllocator *workspace_allocator) {
   VLOG_CALL(PARAM(x), PARAM(scale), PARAM(offset), PARAM(x_desc),
             PARAM(scale_offset_desc), PARAM(epsilon), PARAM(y));
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoBatchNormalizationForward(
-          this, x, scale, offset, estimated_mean, estimated_variance,
-          side_input, x_desc, scale_offset_desc, epsilon,
-          exponential_average_factor, activation_mode, y, batch_mean, batch_var,
-          saved_mean, saved_inv_var, is_training, reserve_space_allocator,
-          workspace_allocator));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoBatchNormalizationForward(
+        this, x, scale, offset, estimated_mean, estimated_variance, side_input,
+        x_desc, scale_offset_desc, epsilon, exponential_average_factor,
+        activation_mode, y, batch_mean, batch_var, saved_mean, saved_inv_var,
+        is_training, reserve_space_allocator, workspace_allocator));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -386,15 +379,13 @@ Stream &Stream::ThenBatchNormalizationBackward(
   VLOG_CALL(PARAM(y_backprop), PARAM(x), PARAM(scale), PARAM(x_desc),
             PARAM(scale_offset_desc), PARAM(epsilon), PARAM(x_backprop),
             PARAM(scale_backprop), PARAM(offset_backprop));
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoBatchNormalizationBackward(
-          this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc,
-          epsilon, x_backprop, scale_backprop, offset_backprop,
-          reserve_space_data, workspace_allocator));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoBatchNormalizationBackward(
+        this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc,
+        epsilon, x_backprop, scale_backprop, offset_backprop,
+        reserve_space_data, workspace_allocator));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -415,17 +406,14 @@ Stream &Stream::ThenBatchNormalizationForward(
     ScratchAllocator *workspace_allocator) {
   VLOG_CALL(PARAM(x), PARAM(scale), PARAM(offset), PARAM(x_desc),
             PARAM(scale_offset_desc), PARAM(epsilon), PARAM(y));
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoBatchNormalizationForward(
-          this, x, scale, offset, estimated_mean, estimated_variance,
-          side_input, x_desc, scale_offset_desc, epsilon,
-          exponential_average_factor, activation_mode, y, batch_mean, batch_var,
-          saved_mean, saved_inv_var, is_training, reserve_space_allocator,
-          workspace_allocator));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoBatchNormalizationForward(
+        this, x, scale, offset, estimated_mean, estimated_variance, side_input,
+        x_desc, scale_offset_desc, epsilon, exponential_average_factor,
+        activation_mode, y, batch_mean, batch_var, saved_mean, saved_inv_var,
+        is_training, reserve_space_allocator, workspace_allocator));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -443,16 +431,14 @@ Stream &Stream::ThenBatchNormalizationBackward(
   VLOG_CALL(PARAM(y_backprop), PARAM(x), PARAM(scale), PARAM(x_desc),
             PARAM(scale_offset_desc), PARAM(epsilon), PARAM(x_backprop),
             PARAM(scale_backprop), PARAM(offset_backprop));
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoBatchNormalizationBackward(
-          this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc,
-          epsilon, x_backprop, scale_backprop, offset_backprop,
-          reserve_space_data, workspace_allocator));
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoBatchNormalizationBackward(
+        this, y_backprop, x, scale, mean, inv_var, x_desc, scale_offset_desc,
+        epsilon, x_backprop, scale_backprop, offset_backprop,
+        reserve_space_data, workspace_allocator));
 
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -477,20 +463,18 @@ Stream &Stream::ThenFusedConvolveWithAlgorithm(
             PARAM(activation_mode), PARAM(output_descriptor), PARAM(output),
             PARAM(algorithm_config));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      auto status = dnn->DoFusedConvolve(
-          this, conv_input_descriptor, conv_input_data, conv_input_scale,
-          filter_descriptor, filter_data, convolution_descriptor,
-          side_input_data, side_input_scale, bias_descriptor, biases,
-          activation_mode, output_descriptor, output, scratch_allocator,
-          algorithm_config, output_profile_result);
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    auto status = dnn->DoFusedConvolve(
+        this, conv_input_descriptor, conv_input_data, conv_input_scale,
+        filter_descriptor, filter_data, convolution_descriptor, side_input_data,
+        side_input_scale, bias_descriptor, biases, activation_mode,
+        output_descriptor, output, scratch_allocator, algorithm_config,
+        output_profile_result);
+    if (!status.ok() && !output_profile_result) {
+      CheckStatus(status);
     }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -515,20 +499,18 @@ Stream &Stream::ThenFusedConvolveWithAlgorithm(
             PARAM(activation_mode), PARAM(output_descriptor), PARAM(output),
             PARAM(algorithm_config));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      auto status = dnn->DoFusedConvolve(
-          this, conv_input_descriptor, conv_input_data, conv_input_scale,
-          filter_descriptor, filter_data, convolution_descriptor,
-          side_input_data, side_input_scale, bias_descriptor, biases,
-          activation_mode, output_descriptor, output, scratch_allocator,
-          algorithm_config, output_profile_result);
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    auto status = dnn->DoFusedConvolve(
+        this, conv_input_descriptor, conv_input_data, conv_input_scale,
+        filter_descriptor, filter_data, convolution_descriptor, side_input_data,
+        side_input_scale, bias_descriptor, biases, activation_mode,
+        output_descriptor, output, scratch_allocator, algorithm_config,
+        output_profile_result);
+    if (!status.ok() && !output_profile_result) {
+      CheckStatus(status);
     }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -554,20 +536,18 @@ Stream &Stream::ThenFusedConvolveWithAlgorithm(
             PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode),
             PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      auto status = dnn->DoFusedConvolve(
-          this, conv_input_descriptor, conv_input_data, conv_input_scale,
-          filter_descriptor, filter_data, convolution_descriptor,
-          side_input_data, side_input_scale, bias_descriptor, biases,
-          activation_mode, output_descriptor, output, scratch_allocator,
-          algorithm_config, output_profile_result);
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    auto status = dnn->DoFusedConvolve(
+        this, conv_input_descriptor, conv_input_data, conv_input_scale,
+        filter_descriptor, filter_data, convolution_descriptor, side_input_data,
+        side_input_scale, bias_descriptor, biases, activation_mode,
+        output_descriptor, output, scratch_allocator, algorithm_config,
+        output_profile_result);
+    if (!status.ok() && !output_profile_result) {
+      CheckStatus(status);
     }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -592,20 +572,18 @@ Stream &Stream::ThenFusedConvolveWithAlgorithm(
             PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode),
             PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      auto status = dnn->DoFusedConvolve(
-          this, conv_input_descriptor, conv_input_data, conv_input_scale,
-          filter_descriptor, filter_data, convolution_descriptor,
-          side_input_data, side_input_scale, bias_descriptor, biases,
-          activation_mode, output_descriptor, output, scratch_allocator,
-          algorithm_config, output_profile_result);
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    auto status = dnn->DoFusedConvolve(
+        this, conv_input_descriptor, conv_input_data, conv_input_scale,
+        filter_descriptor, filter_data, convolution_descriptor, side_input_data,
+        side_input_scale, bias_descriptor, biases, activation_mode,
+        output_descriptor, output, scratch_allocator, algorithm_config,
+        output_profile_result);
+    if (!status.ok() && !output_profile_result) {
+      CheckStatus(status);
     }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -630,20 +608,18 @@ Stream &Stream::ThenFusedConvolveWithAlgorithm(
             PARAM(bias_descriptor), PARAM(biases), PARAM(activation_mode),
             PARAM(output_descriptor), PARAM(output), PARAM(algorithm_config));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      auto status = dnn->DoFusedConvolve(
-          this, conv_input_descriptor, conv_input_data, conv_input_scale,
-          filter_descriptor, filter_data, convolution_descriptor,
-          side_input_data, side_input_scale, bias_descriptor, biases,
-          activation_mode, output_descriptor, output, scratch_allocator,
-          algorithm_config, output_profile_result);
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    auto status = dnn->DoFusedConvolve(
+        this, conv_input_descriptor, conv_input_data, conv_input_scale,
+        filter_descriptor, filter_data, convolution_descriptor, side_input_data,
+        side_input_scale, bias_descriptor, biases, activation_mode,
+        output_descriptor, output, scratch_allocator, algorithm_config,
+        output_profile_result);
+    if (!status.ok() && !output_profile_result) {
+      CheckStatus(status);
     }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -663,29 +639,27 @@ Stream &Stream::ThenConvolveWithAlgorithm(
             PARAM(convolution_descriptor), PARAM(output_descriptor),
             PARAM(output), PARAM(algorithm_config));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status =
-          dnn->PrepareForConvolution(
-                 dnn::ConvolutionKind::FORWARD, this, input_descriptor,
-                 input_data, filter_descriptor, filter_data, output_descriptor,
-                 *output, convolution_descriptor, algorithm_config,
-                 scratch_allocator, &algorithm_desc, &scratch_memory)
-              .ok();
-      if (status) {
-        status = dnn->DoConvolve(
-            this, input_descriptor, input_data, filter_descriptor, filter_data,
-            convolution_descriptor, output_descriptor, output, algorithm_desc,
-            &scratch_memory, output_profile_result);
-      }
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    dnn::AlgorithmDesc algorithm_desc;
+    auto status =
+        dnn->PrepareForConvolution(
+               dnn::ConvolutionKind::FORWARD, this, input_descriptor,
+               input_data, filter_descriptor, filter_data, output_descriptor,
+               *output, convolution_descriptor, algorithm_config,
+               scratch_allocator, &algorithm_desc, &scratch_memory)
+            .ok();
+    if (status) {
+      status = dnn->DoConvolve(
+          this, input_descriptor, input_data, filter_descriptor, filter_data,
+          convolution_descriptor, output_descriptor, output, algorithm_desc,
+          &scratch_memory, output_profile_result);
     }
+    if (!status && !output_profile_result) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -705,29 +679,27 @@ Stream &Stream::ThenConvolveWithAlgorithm(
             PARAM(convolution_descriptor), PARAM(output_descriptor),
             PARAM(output), PARAM(algorithm_config));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status =
-          dnn->PrepareForConvolution(
-                 dnn::ConvolutionKind::FORWARD, this, input_descriptor,
-                 input_data, filter_descriptor, filter_data, output_descriptor,
-                 *output, convolution_descriptor, algorithm_config,
-                 scratch_allocator, &algorithm_desc, &scratch_memory)
-              .ok();
-      if (status) {
-        status = dnn->DoConvolve(
-            this, input_descriptor, input_data, filter_descriptor, filter_data,
-            convolution_descriptor, output_descriptor, output, algorithm_desc,
-            &scratch_memory, output_profile_result);
-      }
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    dnn::AlgorithmDesc algorithm_desc;
+    auto status =
+        dnn->PrepareForConvolution(
+               dnn::ConvolutionKind::FORWARD, this, input_descriptor,
+               input_data, filter_descriptor, filter_data, output_descriptor,
+               *output, convolution_descriptor, algorithm_config,
+               scratch_allocator, &algorithm_desc, &scratch_memory)
+            .ok();
+    if (status) {
+      status = dnn->DoConvolve(
+          this, input_descriptor, input_data, filter_descriptor, filter_data,
+          convolution_descriptor, output_descriptor, output, algorithm_desc,
+          &scratch_memory, output_profile_result);
     }
+    if (!status && !output_profile_result) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -747,29 +719,27 @@ Stream &Stream::ThenConvolveWithAlgorithm(
             PARAM(convolution_descriptor), PARAM(output_descriptor),
             PARAM(output), PARAM(algorithm_config));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status =
-          dnn->PrepareForConvolution(
-                 dnn::ConvolutionKind::FORWARD, this, input_descriptor,
-                 input_data, filter_descriptor, filter_data, output_descriptor,
-                 *output, convolution_descriptor, algorithm_config,
-                 scratch_allocator, &algorithm_desc, &scratch_memory)
-              .ok();
-      if (status) {
-        status = dnn->DoConvolve(
-            this, input_descriptor, input_data, filter_descriptor, filter_data,
-            convolution_descriptor, output_descriptor, output, algorithm_desc,
-            &scratch_memory, output_profile_result);
-      }
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    dnn::AlgorithmDesc algorithm_desc;
+    auto status =
+        dnn->PrepareForConvolution(
+               dnn::ConvolutionKind::FORWARD, this, input_descriptor,
+               input_data, filter_descriptor, filter_data, output_descriptor,
+               *output, convolution_descriptor, algorithm_config,
+               scratch_allocator, &algorithm_desc, &scratch_memory)
+            .ok();
+    if (status) {
+      status = dnn->DoConvolve(
+          this, input_descriptor, input_data, filter_descriptor, filter_data,
+          convolution_descriptor, output_descriptor, output, algorithm_desc,
+          &scratch_memory, output_profile_result);
     }
+    if (!status && !output_profile_result) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -789,29 +759,27 @@ Stream &Stream::ThenConvolveWithAlgorithm(
             PARAM(convolution_descriptor), PARAM(output_descriptor),
             PARAM(output), PARAM(algorithm_config));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status =
-          dnn->PrepareForConvolution(
-                 dnn::ConvolutionKind::FORWARD, this, input_descriptor,
-                 input_data, filter_descriptor, filter_data, output_descriptor,
-                 *output, convolution_descriptor, algorithm_config,
-                 scratch_allocator, &algorithm_desc, &scratch_memory)
-              .ok();
-      if (status) {
-        status = dnn->DoConvolve(
-            this, input_descriptor, input_data, filter_descriptor, filter_data,
-            convolution_descriptor, output_descriptor, output, algorithm_desc,
-            &scratch_memory, output_profile_result);
-      }
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    dnn::AlgorithmDesc algorithm_desc;
+    auto status =
+        dnn->PrepareForConvolution(
+               dnn::ConvolutionKind::FORWARD, this, input_descriptor,
+               input_data, filter_descriptor, filter_data, output_descriptor,
+               *output, convolution_descriptor, algorithm_config,
+               scratch_allocator, &algorithm_desc, &scratch_memory)
+            .ok();
+    if (status) {
+      status = dnn->DoConvolve(
+          this, input_descriptor, input_data, filter_descriptor, filter_data,
+          convolution_descriptor, output_descriptor, output, algorithm_desc,
+          &scratch_memory, output_profile_result);
     }
+    if (!status && !output_profile_result) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -831,29 +799,27 @@ Stream &Stream::ThenConvolveWithAlgorithm(
             PARAM(convolution_descriptor), PARAM(output_descriptor),
             PARAM(output), PARAM(algorithm_config));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status =
-          dnn->PrepareForConvolution(
-                 dnn::ConvolutionKind::FORWARD, this, input_descriptor,
-                 input_data, filter_descriptor, filter_data, output_descriptor,
-                 *output, convolution_descriptor, algorithm_config,
-                 scratch_allocator, &algorithm_desc, &scratch_memory)
-              .ok();
-      if (status) {
-        status = dnn->DoConvolve(
-            this, input_descriptor, input_data, filter_descriptor, filter_data,
-            convolution_descriptor, output_descriptor, output, algorithm_desc,
-            &scratch_memory, output_profile_result);
-      }
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    dnn::AlgorithmDesc algorithm_desc;
+    auto status =
+        dnn->PrepareForConvolution(
+               dnn::ConvolutionKind::FORWARD, this, input_descriptor,
+               input_data, filter_descriptor, filter_data, output_descriptor,
+               *output, convolution_descriptor, algorithm_config,
+               scratch_allocator, &algorithm_desc, &scratch_memory)
+            .ok();
+    if (status) {
+      status = dnn->DoConvolve(
+          this, input_descriptor, input_data, filter_descriptor, filter_data,
+          convolution_descriptor, output_descriptor, output, algorithm_desc,
+          &scratch_memory, output_profile_result);
     }
+    if (!status && !output_profile_result) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -887,18 +853,15 @@ Stream &Stream::ThenConvolveQuantized(
             PARAM(coefficient_scales), PARAM(convolution_descriptor),
             PARAM(output_descriptor), PARAM(output));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoConvolveQuantized(
-          this, input_descriptor, input_data, filter_descriptor,
-          filter_coefficients, coefficient_scales, convolution_descriptor,
-          output_descriptor, output));
-    } else {
-      SetError();
-      LOG(WARNING)
-          << "attempting to perform DNN operation using StreamExecutor "
-             "without DNN support";
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoConvolveQuantized(
+        this, input_descriptor, input_data, filter_descriptor,
+        filter_coefficients, coefficient_scales, convolution_descriptor,
+        output_descriptor, output));
+  } else {
+    SetError();
+    LOG(WARNING) << "attempting to perform DNN operation using StreamExecutor "
+                    "without DNN support";
   }
   return *this;
 }
@@ -917,18 +880,15 @@ Stream &Stream::ThenConvolveQuantized(
             PARAM(coefficient_scales), PARAM(convolution_descriptor),
             PARAM(output_descriptor), PARAM(output));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoConvolveQuantized(
-          this, input_descriptor, input_data, filter_descriptor,
-          filter_coefficients, coefficient_scales, convolution_descriptor,
-          output_descriptor, output));
-    } else {
-      SetError();
-      LOG(WARNING)
-          << "attempting to perform DNN operation using StreamExecutor "
-             "without DNN support";
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoConvolveQuantized(
+        this, input_descriptor, input_data, filter_descriptor,
+        filter_coefficients, coefficient_scales, convolution_descriptor,
+        output_descriptor, output));
+  } else {
+    SetError();
+    LOG(WARNING) << "attempting to perform DNN operation using StreamExecutor "
+                    "without DNN support";
   }
   return *this;
 }
@@ -947,15 +907,13 @@ Stream &Stream::ThenSeparableConvolve(
       PARAM(depth_multiplier), PARAM(first_weights), PARAM(second_weights),
       PARAM(convolution_descriptor), PARAM(output_descriptor), PARAM(output));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoSeparableConvolve(
-          this, batch_descriptor, input_data, filter_descriptor,
-          depth_multiplier, first_weights, second_weights,
-          convolution_descriptor, output_descriptor, output));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoSeparableConvolve(
+        this, batch_descriptor, input_data, filter_descriptor, depth_multiplier,
+        first_weights, second_weights, convolution_descriptor,
+        output_descriptor, output));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -976,31 +934,29 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm(
             PARAM(convolution_descriptor), PARAM(input_descriptor),
             PARAM(backward_input_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status =
-          dnn->PrepareForConvolution(
-                 dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor,
-                 *backward_input_data, filter_descriptor, filter_data,
-                 output_descriptor, backward_output_data,
-                 convolution_descriptor, algorithm_config, scratch_allocator,
-                 &algorithm_desc, &scratch_memory)
-              .ok();
-      if (status) {
-        status = dnn->DoConvolveBackwardData(
-            this, filter_descriptor, filter_data, output_descriptor,
-            backward_output_data, convolution_descriptor, input_descriptor,
-            backward_input_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    dnn::AlgorithmDesc algorithm_desc;
+    auto status =
+        dnn->PrepareForConvolution(
+               dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor,
+               *backward_input_data, filter_descriptor, filter_data,
+               output_descriptor, backward_output_data, convolution_descriptor,
+               algorithm_config, scratch_allocator, &algorithm_desc,
+               &scratch_memory)
+            .ok();
+    if (status) {
+      status = dnn->DoConvolveBackwardData(
+          this, filter_descriptor, filter_data, output_descriptor,
+          backward_output_data, convolution_descriptor, input_descriptor,
+          backward_input_data, algorithm_desc, &scratch_memory,
+          output_profile_result);
     }
+    if (!status && !output_profile_result) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1021,31 +977,29 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm(
             PARAM(convolution_descriptor), PARAM(input_descriptor),
             PARAM(backward_input_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status =
-          dnn->PrepareForConvolution(
-                 dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor,
-                 *backward_input_data, filter_descriptor, filter_data,
-                 output_descriptor, backward_output_data,
-                 convolution_descriptor, algorithm_config, scratch_allocator,
-                 &algorithm_desc, &scratch_memory)
-              .ok();
-      if (status) {
-        status = dnn->DoConvolveBackwardData(
-            this, filter_descriptor, filter_data, output_descriptor,
-            backward_output_data, convolution_descriptor, input_descriptor,
-            backward_input_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    dnn::AlgorithmDesc algorithm_desc;
+    auto status =
+        dnn->PrepareForConvolution(
+               dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor,
+               *backward_input_data, filter_descriptor, filter_data,
+               output_descriptor, backward_output_data, convolution_descriptor,
+               algorithm_config, scratch_allocator, &algorithm_desc,
+               &scratch_memory)
+            .ok();
+    if (status) {
+      status = dnn->DoConvolveBackwardData(
+          this, filter_descriptor, filter_data, output_descriptor,
+          backward_output_data, convolution_descriptor, input_descriptor,
+          backward_input_data, algorithm_desc, &scratch_memory,
+          output_profile_result);
     }
+    if (!status && !output_profile_result) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1066,31 +1020,29 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm(
             PARAM(convolution_descriptor), PARAM(input_descriptor),
             PARAM(backward_input_data), PARAM(algorithm_config));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status =
-          dnn->PrepareForConvolution(
-                 dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor,
-                 *backward_input_data, filter_descriptor, filter_data,
-                 output_descriptor, backward_output_data,
-                 convolution_descriptor, algorithm_config, scratch_allocator,
-                 &algorithm_desc, &scratch_memory)
-              .ok();
-      if (status) {
-        status = dnn->DoConvolveBackwardData(
-            this, filter_descriptor, filter_data, output_descriptor,
-            backward_output_data, convolution_descriptor, input_descriptor,
-            backward_input_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    dnn::AlgorithmDesc algorithm_desc;
+    auto status =
+        dnn->PrepareForConvolution(
+               dnn::ConvolutionKind::BACKWARD_DATA, this, input_descriptor,
+               *backward_input_data, filter_descriptor, filter_data,
+               output_descriptor, backward_output_data, convolution_descriptor,
+               algorithm_config, scratch_allocator, &algorithm_desc,
+               &scratch_memory)
+            .ok();
+    if (status) {
+      status = dnn->DoConvolveBackwardData(
+          this, filter_descriptor, filter_data, output_descriptor,
+          backward_output_data, convolution_descriptor, input_descriptor,
+          backward_input_data, algorithm_desc, &scratch_memory,
+          output_profile_result);
     }
+    if (!status && !output_profile_result) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1111,31 +1063,29 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm(
             PARAM(convolution_descriptor), PARAM(filter_descriptor),
             PARAM(backward_filter_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status =
-          dnn->PrepareForConvolution(
-                 dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor,
-                 input_data, filter_descriptor, *backward_filter_data,
-                 output_descriptor, backward_output_data,
-                 convolution_descriptor, algorithm_config, scratch_allocator,
-                 &algorithm_desc, &scratch_memory)
-              .ok();
-      if (status) {
-        status = dnn->DoConvolveBackwardFilter(
-            this, input_descriptor, input_data, output_descriptor,
-            backward_output_data, convolution_descriptor, filter_descriptor,
-            backward_filter_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    dnn::AlgorithmDesc algorithm_desc;
+    auto status =
+        dnn->PrepareForConvolution(
+               dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor,
+               input_data, filter_descriptor, *backward_filter_data,
+               output_descriptor, backward_output_data, convolution_descriptor,
+               algorithm_config, scratch_allocator, &algorithm_desc,
+               &scratch_memory)
+            .ok();
+    if (status) {
+      status = dnn->DoConvolveBackwardFilter(
+          this, input_descriptor, input_data, output_descriptor,
+          backward_output_data, convolution_descriptor, filter_descriptor,
+          backward_filter_data, algorithm_desc, &scratch_memory,
+          output_profile_result);
     }
+    if (!status && !output_profile_result) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1156,31 +1106,29 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm(
             PARAM(convolution_descriptor), PARAM(filter_descriptor),
             PARAM(backward_filter_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status =
-          dnn->PrepareForConvolution(
-                 dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor,
-                 input_data, filter_descriptor, *backward_filter_data,
-                 output_descriptor, backward_output_data,
-                 convolution_descriptor, algorithm_config, scratch_allocator,
-                 &algorithm_desc, &scratch_memory)
-              .ok();
-      if (status) {
-        status = dnn->DoConvolveBackwardFilter(
-            this, input_descriptor, input_data, output_descriptor,
-            backward_output_data, convolution_descriptor, filter_descriptor,
-            backward_filter_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    dnn::AlgorithmDesc algorithm_desc;
+    auto status =
+        dnn->PrepareForConvolution(
+               dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor,
+               input_data, filter_descriptor, *backward_filter_data,
+               output_descriptor, backward_output_data, convolution_descriptor,
+               algorithm_config, scratch_allocator, &algorithm_desc,
+               &scratch_memory)
+            .ok();
+    if (status) {
+      status = dnn->DoConvolveBackwardFilter(
+          this, input_descriptor, input_data, output_descriptor,
+          backward_output_data, convolution_descriptor, filter_descriptor,
+          backward_filter_data, algorithm_desc, &scratch_memory,
+          output_profile_result);
     }
+    if (!status && !output_profile_result) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1201,31 +1149,29 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm(
             PARAM(convolution_descriptor), PARAM(filter_descriptor),
             PARAM(backward_filter_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status =
-          dnn->PrepareForConvolution(
-                 dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor,
-                 input_data, filter_descriptor, *backward_filter_data,
-                 output_descriptor, backward_output_data,
-                 convolution_descriptor, algorithm_config, scratch_allocator,
-                 &algorithm_desc, &scratch_memory)
-              .ok();
-      if (status) {
-        status = dnn->DoConvolveBackwardFilter(
-            this, input_descriptor, input_data, output_descriptor,
-            backward_output_data, convolution_descriptor, filter_descriptor,
-            backward_filter_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    dnn::AlgorithmDesc algorithm_desc;
+    auto status =
+        dnn->PrepareForConvolution(
+               dnn::ConvolutionKind::BACKWARD_FILTER, this, input_descriptor,
+               input_data, filter_descriptor, *backward_filter_data,
+               output_descriptor, backward_output_data, convolution_descriptor,
+               algorithm_config, scratch_allocator, &algorithm_desc,
+               &scratch_memory)
+            .ok();
+    if (status) {
+      status = dnn->DoConvolveBackwardFilter(
+          this, input_descriptor, input_data, output_descriptor,
+          backward_output_data, convolution_descriptor, filter_descriptor,
+          backward_filter_data, algorithm_desc, &scratch_memory,
+          output_profile_result);
     }
+    if (!status && !output_profile_result) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1239,14 +1185,12 @@ Stream &Stream::ThenConvolveBackwardBiasImpl(
   VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), PARAM(bias_descriptor),
             PARAM(backward_bias_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoConvolveBackwardBias(this, input_descriptor, input_data,
-                                             bias_descriptor,
-                                             backward_bias_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoConvolveBackwardBias(this, input_descriptor, input_data,
+                                           bias_descriptor,
+                                           backward_bias_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1286,13 +1230,11 @@ Stream &Stream::ThenMatMul(const DeviceMemory<float> &input_data,
   VLOG_CALL(PARAM(input_data), PARAM(weights), PARAM(input_dimensions),
             PARAM(output_dimensions), PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoMatMul(this, input_data, weights, input_dimensions,
-                               output_dimensions, output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoMatMul(this, input_data, weights, input_dimensions,
+                             output_dimensions, output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1307,14 +1249,12 @@ Stream &Stream::ThenMatMulQuantized(
             PARAM(input_dimensions), PARAM(output_dimensions),
             PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoMatMulQuantized(this, input_data, weights,
-                                        weight_scales, input_dimensions,
-                                        output_dimensions, output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoMatMulQuantized(this, input_data, weights, weight_scales,
+                                      input_dimensions, output_dimensions,
+                                      output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1329,14 +1269,12 @@ Stream &Stream::ThenMatMulQuantized(
             PARAM(input_dimensions), PARAM(output_dimensions),
             PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoMatMulQuantized(this, input_data, weights,
-                                        weight_scales, input_dimensions,
-                                        output_dimensions, output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoMatMulQuantized(this, input_data, weights, weight_scales,
+                                      input_dimensions, output_dimensions,
+                                      output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1348,13 +1286,11 @@ Stream &Stream::ThenBiasAdd(const DeviceMemory<float> &input_data,
   VLOG_CALL(PARAM(input_data), PARAM(biases), PARAM(dimensions),
             PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(
-          dnn->DoBiasAdd(this, input_data, biases, dimensions, output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(
+        dnn->DoBiasAdd(this, input_data, biases, dimensions, output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1369,17 +1305,14 @@ Stream &Stream::ThenPoolForward(
             PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
             PARAM(workspace_allocator));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
-                                    input_data, output_dimensions, output_data,
-                                    workspace_allocator));
-    } else {
-      SetError();
-      LOG(WARNING)
-          << "attempting to perform DNN operation using StreamExecutor "
-             "without DNN support";
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
+                                  input_data, output_dimensions, output_data,
+                                  workspace_allocator));
+  } else {
+    SetError();
+    LOG(WARNING) << "attempting to perform DNN operation using StreamExecutor "
+                    "without DNN support";
   }
   return *this;
 }
@@ -1394,14 +1327,12 @@ Stream &Stream::ThenPoolForward(
             PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
             PARAM(workspace_allocator));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
-                                    input_data, output_dimensions, output_data,
-                                    workspace_allocator));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
+                                  input_data, output_dimensions, output_data,
+                                  workspace_allocator));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1417,14 +1348,12 @@ Stream &Stream::ThenPoolForward(
             PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
             PARAM(workspace_allocator));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
-                                    input_data, output_dimensions, output_data,
-                                    workspace_allocator));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
+                                  input_data, output_dimensions, output_data,
+                                  workspace_allocator));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1439,14 +1368,12 @@ Stream &Stream::ThenPoolForward(
             PARAM(input_data), PARAM(output_dimensions), PARAM(output_data),
             PARAM(workspace_allocator));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
-                                    input_data, output_dimensions, output_data,
-                                    workspace_allocator));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoPoolForward(this, pooling_dimensions, input_dimensions,
+                                  input_data, output_dimensions, output_data,
+                                  workspace_allocator));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1465,18 +1392,15 @@ Stream &Stream::ThenPoolBackward(
             PARAM(input_diff_data), PARAM(output_diff_data),
             PARAM(workspace_allocator));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
-                                     input_data, output_dimensions, output_data,
-                                     input_diff_data, output_diff_data,
-                                     workspace_allocator));
-    } else {
-      SetError();
-      LOG(WARNING)
-          << "attempting to perform DNN operation using StreamExecutor "
-             "without DNN support";
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
+                                   input_data, output_dimensions, output_data,
+                                   input_diff_data, output_diff_data,
+                                   workspace_allocator));
+  } else {
+    SetError();
+    LOG(WARNING) << "attempting to perform DNN operation using StreamExecutor "
+                    "without DNN support";
   }
   return *this;
 }
@@ -1495,15 +1419,13 @@ Stream &Stream::ThenPoolBackward(
             PARAM(input_diff_data), PARAM(output_diff_data),
             PARAM(workspace_allocator));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
-                                     input_data, output_dimensions, output_data,
-                                     input_diff_data, output_diff_data,
-                                     workspace_allocator));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
+                                   input_data, output_dimensions, output_data,
+                                   input_diff_data, output_diff_data,
+                                   workspace_allocator));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1522,15 +1444,13 @@ Stream &Stream::ThenPoolBackward(
             PARAM(input_diff_data), PARAM(output_diff_data),
             PARAM(workspace_allocator));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
-                                     input_data, output_dimensions, output_data,
-                                     input_diff_data, output_diff_data,
-                                     workspace_allocator));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoPoolBackward(this, pooling_dimensions, input_dimensions,
+                                   input_data, output_dimensions, output_data,
+                                   input_diff_data, output_diff_data,
+                                   workspace_allocator));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1542,13 +1462,11 @@ Stream &Stream::ThenNormalizeWithDimensions(
   VLOG_CALL(PARAM(normalize_descriptor), PARAM(dimensions), PARAM(input_data),
             PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoNormalizeWithDimensions(
-          this, normalize_descriptor, dimensions, input_data, output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoNormalizeWithDimensions(
+        this, normalize_descriptor, dimensions, input_data, output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1564,15 +1482,13 @@ Stream &Stream::ThenNormalizeBackwardWithDimensions(
             PARAM(normalized_data), PARAM(normalized_variable_gradient),
             PARAM(raw_variable_gradient), PARAM(workspace_allocator));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoNormalizeBackwardWithDimensions(
-          this, normalize_descriptor, dimensions, raw_data, normalized_data,
-          normalized_variable_gradient, raw_variable_gradient,
-          workspace_allocator));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoNormalizeBackwardWithDimensions(
+        this, normalize_descriptor, dimensions, raw_data, normalized_data,
+        normalized_variable_gradient, raw_variable_gradient,
+        workspace_allocator));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1593,13 +1509,11 @@ Stream &Stream::ThenActivateWithOptions(dnn::ActivationMode activation_mode,
   VLOG_CALL(PARAM(activation_mode), PARAM(dimensions), PARAM(input_data),
             PARAM(output_data), PARAM(options));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoActivate(this, activation_mode, dimensions, input_data,
-                                 output_data, options));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoActivate(this, activation_mode, dimensions, input_data,
+                               output_data, options));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1623,13 +1537,11 @@ Stream &Stream::ThenDepthConcatenate(
     }
   }
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoDepthConcatenate(this, input_dimensions, input_data,
-                                         output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoDepthConcatenate(this, input_dimensions, input_data,
+                                       output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1670,13 +1582,11 @@ Stream &Stream::ThenSpaceConcatenate(
       return *this;
     }
   }
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoSpaceConcatenate(this, input_dimensions, input_data,
-                                         output_data, concat_direction));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoSpaceConcatenate(this, input_dimensions, input_data,
+                                       output_data, concat_direction));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1688,13 +1598,11 @@ Stream &Stream::ThenReshape(const dnn::BatchDescriptor &input_dimensions,
   VLOG_CALL(PARAM(input_dimensions), PARAM(input_data),
             PARAM(output_dimensions), PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoReshape(this, input_dimensions, input_data,
-                                output_dimensions, output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoReshape(this, input_dimensions, input_data,
+                              output_dimensions, output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1708,14 +1616,12 @@ Stream &Stream::ThenDepthToSpace(
             PARAM(depth_to_space_layout), PARAM(sqrt_depth_reduction),
             PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoDepthToSpace(this, input_dimensions, input_data,
-                                     depth_to_space_layout,
-                                     sqrt_depth_reduction, output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoDepthToSpace(this, input_dimensions, input_data,
+                                   depth_to_space_layout, sqrt_depth_reduction,
+                                   output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1729,14 +1635,12 @@ Stream &Stream::ThenSpaceToDepth(
             PARAM(space_to_depth_layout), PARAM(sqrt_depth_increase),
             PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoSpaceToDepth(this, input_dimensions, input_data,
-                                     space_to_depth_layout, sqrt_depth_increase,
-                                     output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoSpaceToDepth(this, input_dimensions, input_data,
+                                   space_to_depth_layout, sqrt_depth_increase,
+                                   output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1750,14 +1654,12 @@ Stream &Stream::ThenElementwiseOperate(
   VLOG_CALL(PARAM(operation), PARAM(input_dimensions), PARAM(input_data),
             PARAM(output_dimensions), PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoElementwiseOperate(this, operation, input_dimensions,
-                                           input_data, output_dimensions,
-                                           output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoElementwiseOperate(this, operation, input_dimensions,
+                                         input_data, output_dimensions,
+                                         output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1773,14 +1675,12 @@ Stream &Stream::ThenElementwiseOperateScaledQuantized(
             PARAM(input_dimensions), PARAM(input_data),
             PARAM(output_dimensions), PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoElementwiseOperateScaledQuantized(
-          this, operation, input_multiplicands, output_divisor,
-          input_dimensions, input_data, output_dimensions, output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoElementwiseOperateScaledQuantized(
+        this, operation, input_multiplicands, output_divisor, input_dimensions,
+        input_data, output_dimensions, output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1793,13 +1693,11 @@ Stream &Stream::ThenXYPad(const dnn::BatchDescriptor &dimensions,
             PARAM(right_pad), PARAM(top_pad), PARAM(bottom_pad),
             PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoXYPad(this, dimensions, input_data, left_pad, right_pad,
-                              top_pad, bottom_pad, output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoXYPad(this, dimensions, input_data, left_pad, right_pad,
+                            top_pad, bottom_pad, output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1813,14 +1711,11 @@ Stream &Stream::ThenXYSlice(const dnn::BatchDescriptor &dimensions,
             PARAM(right_trim), PARAM(top_trim), PARAM(bottom_trim),
             PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoXYSlice(this, dimensions, input_data, left_trim,
-                                right_trim, top_trim, bottom_trim,
-                                output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoXYSlice(this, dimensions, input_data, left_trim,
+                              right_trim, top_trim, bottom_trim, output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1832,13 +1727,11 @@ Stream &Stream::ThenXYBroadcast(const dnn::BatchDescriptor &dimensions,
   VLOG_CALL(PARAM(dimensions), PARAM(input_data), PARAM(replicate_x),
             PARAM(replicate_y), PARAM(output_data));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoXYBroadcast(this, dimensions, input_data, replicate_x,
-                                    replicate_y, output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoXYBroadcast(this, dimensions, input_data, replicate_x,
+                                  replicate_y, output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1849,13 +1742,11 @@ Stream &Stream::ThenMemcpyD2HQuantized(
   VLOG_CALL(PARAM(gpu_unquantized_src), PARAM(mode), PARAM(host_dst),
             PARAM(size));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoMemcpyD2HQuantized(this, gpu_unquantized_src, mode,
-                                           host_dst, size));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoMemcpyD2HQuantized(this, gpu_unquantized_src, mode,
+                                         host_dst, size));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1866,13 +1757,11 @@ Stream &Stream::ThenMemcpyH2DQuantized(
   VLOG_CALL(PARAM(host_src), PARAM(size), PARAM(mode),
             PARAM(gpu_unquantized_dst));
 
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoMemcpyH2DQuantized(this, host_src, size, mode,
-                                           gpu_unquantized_dst));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoMemcpyH2DQuantized(this, host_src, size, mode,
+                                         gpu_unquantized_dst));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -1920,7 +1809,7 @@ Stream *Stream::GetOrCreateSubStream() {
                             false);
   Stream *sub_stream = sub_streams_.back().first.get();
   sub_stream->Init();
-  if (!sub_stream->ok_) {
+  if (!sub_stream->ok()) {
     LOG(ERROR) << "sub-stream failed to be initialized";
   }
   VLOG(1) << DebugStreamPointers() << " created new sub_stream "
@@ -1972,24 +1861,14 @@ void Stream::ReturnSubStream(Stream *sub_stream) {
 Stream &Stream::ThenStartTimer(Timer *t) {
   VLOG_CALL(PARAM(t));
 
-  if (ok()) {
-    CheckError(parent_->StartTimer(this, t));
-  } else {
-    LOG(INFO) << DebugStreamPointers()
-              << " did not enqueue 'start timer': " << t;
-  }
+  CheckError(parent_->StartTimer(this, t));
   return *this;
 }
 
 Stream &Stream::ThenStopTimer(Timer *t) {
   VLOG_CALL(PARAM(t));
 
-  if (ok()) {
-    CheckError(parent_->StopTimer(this, t));
-  } else {
-    LOG(INFO) << DebugStreamPointers()
-              << " did not enqueue 'stop timer': " << t;
-  }
+  CheckError(parent_->StopTimer(this, t));
   return *this;
 }
 
@@ -2079,7 +1958,8 @@ Stream &Stream::ThenBlasAsum(uint64 elem_count, const DeviceMemory<double> &x,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<double> &, int,
-               DeviceMemory<double> *> impl;
+               DeviceMemory<double> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasAsum, elem_count, x, incx,
               result);
 }
@@ -2090,7 +1970,8 @@ Stream &Stream::ThenBlasAsum(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<float> *> impl;
+               DeviceMemory<float> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasAsum, elem_count, x, incx,
               result);
 }
@@ -2101,7 +1982,8 @@ Stream &Stream::ThenBlasAsum(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<double> *> impl;
+               DeviceMemory<double> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasAsum, elem_count, x, incx,
               result);
 }
@@ -2113,7 +1995,8 @@ Stream &Stream::ThenBlasAxpy(uint64 elem_count, float alpha,
             PARAM(incy));
 
   ThenBlasImpl<uint64, float, const DeviceMemory<float> &, int,
-               DeviceMemory<float> *, int> impl;
+               DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx,
               y, incy);
 }
@@ -2125,7 +2008,8 @@ Stream &Stream::ThenBlasAxpy(uint64 elem_count, double alpha,
             PARAM(incy));
 
   ThenBlasImpl<uint64, double, const DeviceMemory<double> &, int,
-               DeviceMemory<double> *, int> impl;
+               DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx,
               y, incy);
 }
@@ -2139,7 +2023,8 @@ Stream &Stream::ThenBlasAxpy(uint64 elem_count, std::complex<float> alpha,
 
   ThenBlasImpl<uint64, std::complex<float>,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx,
               y, incy);
 }
@@ -2153,7 +2038,8 @@ Stream &Stream::ThenBlasAxpy(uint64 elem_count, std::complex<double> alpha,
 
   ThenBlasImpl<uint64, std::complex<double>,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasAxpy, elem_count, alpha, x, incx,
               y, incy);
 }
@@ -2163,7 +2049,8 @@ Stream &Stream::ThenBlasCopy(uint64 elem_count, const DeviceMemory<float> &x,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy));
 
   ThenBlasImpl<uint64, const DeviceMemory<float> &, int, DeviceMemory<float> *,
-               int> impl;
+               int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y,
               incy);
 }
@@ -2173,7 +2060,8 @@ Stream &Stream::ThenBlasCopy(uint64 elem_count, const DeviceMemory<double> &x,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy));
 
   ThenBlasImpl<uint64, const DeviceMemory<double> &, int,
-               DeviceMemory<double> *, int> impl;
+               DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y,
               incy);
 }
@@ -2185,7 +2073,8 @@ Stream &Stream::ThenBlasCopy(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy));
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y,
               incy);
 }
@@ -2197,7 +2086,8 @@ Stream &Stream::ThenBlasCopy(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy));
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasCopy, elem_count, x, incx, y,
               incy);
 }
@@ -2209,7 +2099,8 @@ Stream &Stream::ThenBlasDot(uint64 elem_count, const DeviceMemory<float> &x,
             PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<float> &, int,
-               const DeviceMemory<float> &, int, DeviceMemory<float> *> impl;
+               const DeviceMemory<float> &, int, DeviceMemory<float> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasDot, elem_count, x, incx, y, incy,
               result);
 }
@@ -2221,7 +2112,8 @@ Stream &Stream::ThenBlasDot(uint64 elem_count, const DeviceMemory<double> &x,
             PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<double> &, int,
-               const DeviceMemory<double> &, int, DeviceMemory<double> *> impl;
+               const DeviceMemory<double> &, int, DeviceMemory<double> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasDot, elem_count, x, incx, y, incy,
               result);
 }
@@ -2237,7 +2129,8 @@ Stream &Stream::ThenBlasDotc(uint64 elem_count,
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<float>> &, int,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *> impl;
+               DeviceMemory<std::complex<float>> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasDotc, elem_count, x, incx, y,
               incy, result);
 }
@@ -2253,7 +2146,8 @@ Stream &Stream::ThenBlasDotc(uint64 elem_count,
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<double>> &, int,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *> impl;
+               DeviceMemory<std::complex<double>> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasDotc, elem_count, x, incx, y,
               incy, result);
 }
@@ -2269,7 +2163,8 @@ Stream &Stream::ThenBlasDotu(uint64 elem_count,
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<float>> &, int,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *> impl;
+               DeviceMemory<std::complex<float>> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasDotu, elem_count, x, incx, y,
               incy, result);
 }
@@ -2285,7 +2180,8 @@ Stream &Stream::ThenBlasDotu(uint64 elem_count,
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<double>> &, int,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *> impl;
+               DeviceMemory<std::complex<double>> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasDotu, elem_count, x, incx, y,
               incy, result);
 }
@@ -2305,7 +2201,8 @@ Stream &Stream::ThenBlasNrm2(uint64 elem_count, const DeviceMemory<double> &x,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<double> &, int,
-               DeviceMemory<double> *> impl;
+               DeviceMemory<double> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasNrm2, elem_count, x, incx,
               result);
 }
@@ -2316,7 +2213,8 @@ Stream &Stream::ThenBlasNrm2(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<float> *> impl;
+               DeviceMemory<float> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasNrm2, elem_count, x, incx,
               result);
 }
@@ -2327,7 +2225,8 @@ Stream &Stream::ThenBlasNrm2(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<double> *> impl;
+               DeviceMemory<double> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasNrm2, elem_count, x, incx,
               result);
 }
@@ -2339,7 +2238,8 @@ Stream &Stream::ThenBlasRot(uint64 elem_count, DeviceMemory<float> *x, int incx,
             PARAM(c), PARAM(s));
 
   ThenBlasImpl<uint64, DeviceMemory<float> *, int, DeviceMemory<float> *, int,
-               float, float> impl;
+               float, float>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy,
               c, s);
 }
@@ -2351,7 +2251,8 @@ Stream &Stream::ThenBlasRot(uint64 elem_count, DeviceMemory<double> *x,
             PARAM(c), PARAM(s));
 
   ThenBlasImpl<uint64, DeviceMemory<double> *, int, DeviceMemory<double> *, int,
-               double, double> impl;
+               double, double>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy,
               c, s);
 }
@@ -2364,7 +2265,8 @@ Stream &Stream::ThenBlasRot(uint64 elem_count,
             PARAM(c), PARAM(s));
 
   ThenBlasImpl<uint64, DeviceMemory<std::complex<float>> *, int,
-               DeviceMemory<std::complex<float>> *, int, float, float> impl;
+               DeviceMemory<std::complex<float>> *, int, float, float>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy,
               c, s);
 }
@@ -2377,7 +2279,8 @@ Stream &Stream::ThenBlasRot(uint64 elem_count,
             PARAM(c), PARAM(s));
 
   ThenBlasImpl<uint64, DeviceMemory<std::complex<double>> *, int,
-               DeviceMemory<std::complex<double>> *, int, double, double> impl;
+               DeviceMemory<std::complex<double>> *, int, double, double>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRot, elem_count, x, incx, y, incy,
               c, s);
 }
@@ -2387,7 +2290,8 @@ Stream &Stream::ThenBlasRotg(DeviceMemory<float> *a, DeviceMemory<float> *b,
   VLOG_CALL(PARAM(a), PARAM(b), PARAM(c), PARAM(s));
 
   ThenBlasImpl<DeviceMemory<float> *, DeviceMemory<float> *,
-               DeviceMemory<float> *, DeviceMemory<float> *> impl;
+               DeviceMemory<float> *, DeviceMemory<float> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s);
 }
 
@@ -2396,7 +2300,8 @@ Stream &Stream::ThenBlasRotg(DeviceMemory<double> *a, DeviceMemory<double> *b,
   VLOG_CALL(PARAM(a), PARAM(b), PARAM(c), PARAM(s));
 
   ThenBlasImpl<DeviceMemory<double> *, DeviceMemory<double> *,
-               DeviceMemory<double> *, DeviceMemory<double> *> impl;
+               DeviceMemory<double> *, DeviceMemory<double> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s);
 }
 
@@ -2408,7 +2313,8 @@ Stream &Stream::ThenBlasRotg(DeviceMemory<std::complex<float>> *a,
 
   ThenBlasImpl<DeviceMemory<std::complex<float>> *,
                DeviceMemory<std::complex<float>> *, DeviceMemory<float> *,
-               DeviceMemory<std::complex<float>> *> impl;
+               DeviceMemory<std::complex<float>> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s);
 }
 
@@ -2420,7 +2326,8 @@ Stream &Stream::ThenBlasRotg(DeviceMemory<std::complex<double>> *a,
 
   ThenBlasImpl<DeviceMemory<std::complex<double>> *,
                DeviceMemory<std::complex<double>> *, DeviceMemory<double> *,
-               DeviceMemory<std::complex<double>> *> impl;
+               DeviceMemory<std::complex<double>> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRotg, a, b, c, s);
 }
 
@@ -2431,7 +2338,8 @@ Stream &Stream::ThenBlasRotm(uint64 elem_count, DeviceMemory<float> *x,
             PARAM(param));
 
   ThenBlasImpl<uint64, DeviceMemory<float> *, int, DeviceMemory<float> *, int,
-               const DeviceMemory<float> &> impl;
+               const DeviceMemory<float> &>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRotm, elem_count, x, incx, y,
               incy, param);
 }
@@ -2443,7 +2351,8 @@ Stream &Stream::ThenBlasRotm(uint64 elem_count, DeviceMemory<double> *x,
             PARAM(param));
 
   ThenBlasImpl<uint64, DeviceMemory<double> *, int, DeviceMemory<double> *, int,
-               const DeviceMemory<double> &> impl;
+               const DeviceMemory<double> &>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRotm, elem_count, x, incx, y,
               incy, param);
 }
@@ -2456,7 +2365,8 @@ Stream &Stream::ThenBlasRotmg(DeviceMemory<float> *d1, DeviceMemory<float> *d2,
 
   ThenBlasImpl<DeviceMemory<float> *, DeviceMemory<float> *,
                DeviceMemory<float> *, const DeviceMemory<float> &,
-               DeviceMemory<float> *> impl;
+               DeviceMemory<float> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRotmg, d1, d2, x1, y1, param);
 }
 
@@ -2469,7 +2379,8 @@ Stream &Stream::ThenBlasRotmg(DeviceMemory<double> *d1,
 
   ThenBlasImpl<DeviceMemory<double> *, DeviceMemory<double> *,
                DeviceMemory<double> *, const DeviceMemory<double> &,
-               DeviceMemory<double> *> impl;
+               DeviceMemory<double> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasRotmg, d1, d2, x1, y1, param);
 }
 
@@ -2510,7 +2421,8 @@ Stream &Stream::ThenBlasScal(uint64 elem_count, std::complex<float> alpha,
   VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx));
 
   ThenBlasImpl<uint64, std::complex<float>, DeviceMemory<std::complex<float>> *,
-               int> impl;
+               int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasScal, elem_count, alpha, x, incx);
 }
 
@@ -2519,7 +2431,8 @@ Stream &Stream::ThenBlasScal(uint64 elem_count, std::complex<double> alpha,
   VLOG_CALL(PARAM(elem_count), PARAM(alpha), PARAM(x), PARAM(incx));
 
   ThenBlasImpl<uint64, std::complex<double>,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasScal, elem_count, alpha, x, incx);
 }
 
@@ -2549,7 +2462,8 @@ Stream &Stream::ThenBlasSwap(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy));
 
   ThenBlasImpl<uint64, DeviceMemory<std::complex<float>> *, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSwap, elem_count, x, incx, y,
               incy);
 }
@@ -2560,7 +2474,8 @@ Stream &Stream::ThenBlasSwap(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(y), PARAM(incy));
 
   ThenBlasImpl<uint64, DeviceMemory<std::complex<double>> *, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSwap, elem_count, x, incx, y,
               incy);
 }
@@ -2591,7 +2506,8 @@ Stream &Stream::ThenBlasIamax(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<int> *> impl;
+               DeviceMemory<int> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasIamax, elem_count, x, incx,
               result);
 }
@@ -2602,7 +2518,8 @@ Stream &Stream::ThenBlasIamax(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<int> *> impl;
+               DeviceMemory<int> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasIamax, elem_count, x, incx,
               result);
 }
@@ -2633,7 +2550,8 @@ Stream &Stream::ThenBlasIamin(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<int> *> impl;
+               DeviceMemory<int> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasIamin, elem_count, x, incx,
               result);
 }
@@ -2644,7 +2562,8 @@ Stream &Stream::ThenBlasIamin(uint64 elem_count,
   VLOG_CALL(PARAM(elem_count), PARAM(x), PARAM(incx), PARAM(result));
 
   ThenBlasImpl<uint64, const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<int> *> impl;
+               DeviceMemory<int> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasIamin, elem_count, x, incx,
               result);
 }
@@ -2660,7 +2579,8 @@ Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n,
 
   ThenBlasImpl<blas::Transpose, uint64, uint64, uint64, uint64, float,
                const DeviceMemory<float> &, int, const DeviceMemory<float> &,
-               int, float, DeviceMemory<float> *, int> impl;
+               int, float, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha,
               a, lda, x, incx, beta, y, incy);
 }
@@ -2676,7 +2596,8 @@ Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n,
 
   ThenBlasImpl<blas::Transpose, uint64, uint64, uint64, uint64, double,
                const DeviceMemory<double> &, int, const DeviceMemory<double> &,
-               int, double, DeviceMemory<double> *, int> impl;
+               int, double, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha,
               a, lda, x, incx, beta, y, incy);
 }
@@ -2695,8 +2616,8 @@ Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n,
   ThenBlasImpl<blas::Transpose, uint64, uint64, uint64, uint64,
                std::complex<float>, const DeviceMemory<std::complex<float>> &,
                int, const DeviceMemory<std::complex<float>> &, int,
-               std::complex<float>, DeviceMemory<std::complex<float>> *,
-               int> impl;
+               std::complex<float>, DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha,
               a, lda, x, incx, beta, y, incy);
 }
@@ -2715,8 +2636,8 @@ Stream &Stream::ThenBlasGbmv(blas::Transpose trans, uint64 m, uint64 n,
   ThenBlasImpl<blas::Transpose, uint64, uint64, uint64, uint64,
                std::complex<double>, const DeviceMemory<std::complex<double>> &,
                int, const DeviceMemory<std::complex<double>> &, int,
-               std::complex<double>, DeviceMemory<std::complex<double>> *,
-               int> impl;
+               std::complex<double>, DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGbmv, trans, m, n, kl, ku, alpha,
               a, lda, x, incx, beta, y, incy);
 }
@@ -2731,7 +2652,8 @@ Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n,
 
   ThenBlasImpl<blas::Transpose, uint64, uint64, float,
                const DeviceMemory<float> &, int, const DeviceMemory<float> &,
-               int, float, DeviceMemory<float> *, int> impl;
+               int, float, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda,
               x, incx, beta, y, incy);
 }
@@ -2746,7 +2668,8 @@ Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n,
 
   ThenBlasImpl<blas::Transpose, uint64, uint64, double,
                const DeviceMemory<double> &, int, const DeviceMemory<double> &,
-               int, double, DeviceMemory<double> *, int> impl;
+               int, double, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda,
               x, incx, beta, y, incy);
 }
@@ -2765,8 +2688,8 @@ Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n,
   ThenBlasImpl<blas::Transpose, uint64, uint64, std::complex<float>,
                const DeviceMemory<std::complex<float>> &, int,
                const DeviceMemory<std::complex<float>> &, int,
-               std::complex<float>, DeviceMemory<std::complex<float>> *,
-               int> impl;
+               std::complex<float>, DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda,
               x, incx, beta, y, incy);
 }
@@ -2785,8 +2708,8 @@ Stream &Stream::ThenBlasGemv(blas::Transpose trans, uint64 m, uint64 n,
   ThenBlasImpl<blas::Transpose, uint64, uint64, std::complex<double>,
                const DeviceMemory<std::complex<double>> &, int,
                const DeviceMemory<std::complex<double>> &, int,
-               std::complex<double>, DeviceMemory<std::complex<double>> *,
-               int> impl;
+               std::complex<double>, DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGemv, trans, m, n, alpha, a, lda,
               x, incx, beta, y, incy);
 }
@@ -2799,8 +2722,8 @@ Stream &Stream::ThenBlasGer(uint64 m, uint64 n, float alpha,
             PARAM(incy), PARAM(a), PARAM(lda));
 
   ThenBlasImpl<uint64, uint64, float, const DeviceMemory<float> &, int,
-               const DeviceMemory<float> &, int, DeviceMemory<float> *,
-               int> impl;
+               const DeviceMemory<float> &, int, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGer, m, n, alpha, x, incx, y,
               incy, a, lda);
 }
@@ -2813,8 +2736,8 @@ Stream &Stream::ThenBlasGer(uint64 m, uint64 n, double alpha,
             PARAM(incy), PARAM(a), PARAM(lda));
 
   ThenBlasImpl<uint64, uint64, double, const DeviceMemory<double> &, int,
-               const DeviceMemory<double> &, int, DeviceMemory<double> *,
-               int> impl;
+               const DeviceMemory<double> &, int, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGer, m, n, alpha, x, incx, y,
               incy, a, lda);
 }
@@ -2831,7 +2754,8 @@ Stream &Stream::ThenBlasGerc(uint64 m, uint64 n, std::complex<float> alpha,
   ThenBlasImpl<uint64, uint64, std::complex<float>,
                const DeviceMemory<std::complex<float>> &, int,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGerc, m, n, alpha, x, incx, y,
               incy, a, lda);
 }
@@ -2848,7 +2772,8 @@ Stream &Stream::ThenBlasGerc(uint64 m, uint64 n, std::complex<double> alpha,
   ThenBlasImpl<uint64, uint64, std::complex<double>,
                const DeviceMemory<std::complex<double>> &, int,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGerc, m, n, alpha, x, incx, y,
               incy, a, lda);
 }
@@ -2865,7 +2790,8 @@ Stream &Stream::ThenBlasGeru(uint64 m, uint64 n, std::complex<float> alpha,
   ThenBlasImpl<uint64, uint64, std::complex<float>,
                const DeviceMemory<std::complex<float>> &, int,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGeru, m, n, alpha, x, incx, y,
               incy, a, lda);
 }
@@ -2882,7 +2808,8 @@ Stream &Stream::ThenBlasGeru(uint64 m, uint64 n, std::complex<double> alpha,
   ThenBlasImpl<uint64, uint64, std::complex<double>,
                const DeviceMemory<std::complex<double>> &, int,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGeru, m, n, alpha, x, incx, y,
               incy, a, lda);
 }
@@ -2900,8 +2827,8 @@ Stream &Stream::ThenBlasHbmv(blas::UpperLower uplo, uint64 n, uint64 k,
   ThenBlasImpl<blas::UpperLower, uint64, uint64, std::complex<float>,
                const DeviceMemory<std::complex<float>> &, int,
                const DeviceMemory<std::complex<float>> &, int,
-               std::complex<float>, DeviceMemory<std::complex<float>> *,
-               int> impl;
+               std::complex<float>, DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHbmv, uplo, n, k, alpha, a, lda,
               x, incx, beta, y, incy);
 }
@@ -2919,8 +2846,8 @@ Stream &Stream::ThenBlasHbmv(blas::UpperLower uplo, uint64 n, uint64 k,
   ThenBlasImpl<blas::UpperLower, uint64, uint64, std::complex<double>,
                const DeviceMemory<std::complex<double>> &, int,
                const DeviceMemory<std::complex<double>> &, int,
-               std::complex<double>, DeviceMemory<std::complex<double>> *,
-               int> impl;
+               std::complex<double>, DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHbmv, uplo, n, k, alpha, a, lda,
               x, incx, beta, y, incy);
 }
@@ -2938,8 +2865,8 @@ Stream &Stream::ThenBlasHemv(blas::UpperLower uplo, uint64 n,
   ThenBlasImpl<blas::UpperLower, uint64, std::complex<float>,
                const DeviceMemory<std::complex<float>> &, int,
                const DeviceMemory<std::complex<float>> &, int,
-               std::complex<float>, DeviceMemory<std::complex<float>> *,
-               int> impl;
+               std::complex<float>, DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHemv, uplo, n, alpha, a, lda, x,
               incx, beta, y, incy);
 }
@@ -2957,8 +2884,8 @@ Stream &Stream::ThenBlasHemv(blas::UpperLower uplo, uint64 n,
   ThenBlasImpl<blas::UpperLower, uint64, std::complex<double>,
                const DeviceMemory<std::complex<double>> &, int,
                const DeviceMemory<std::complex<double>> &, int,
-               std::complex<double>, DeviceMemory<std::complex<double>> *,
-               int> impl;
+               std::complex<double>, DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHemv, uplo, n, alpha, a, lda, x,
               incx, beta, y, incy);
 }
@@ -2972,7 +2899,8 @@ Stream &Stream::ThenBlasHer(blas::UpperLower uplo, uint64 n, float alpha,
 
   ThenBlasImpl<blas::UpperLower, uint64, float,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHer, uplo, n, alpha, x, incx, a,
               lda);
 }
@@ -2986,7 +2914,8 @@ Stream &Stream::ThenBlasHer(blas::UpperLower uplo, uint64 n, double alpha,
 
   ThenBlasImpl<blas::UpperLower, uint64, double,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHer, uplo, n, alpha, x, incx, a,
               lda);
 }
@@ -3004,7 +2933,8 @@ Stream &Stream::ThenBlasHer2(blas::UpperLower uplo, uint64 n,
   ThenBlasImpl<blas::UpperLower, uint64, std::complex<float>,
                const DeviceMemory<std::complex<float>> &, int,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHer2, uplo, n, alpha, x, incx, y,
               incy, a, lda);
 }
@@ -3022,7 +2952,8 @@ Stream &Stream::ThenBlasHer2(blas::UpperLower uplo, uint64 n,
   ThenBlasImpl<blas::UpperLower, uint64, std::complex<double>,
                const DeviceMemory<std::complex<double>> &, int,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHer2, uplo, n, alpha, x, incx, y,
               incy, a, lda);
 }
@@ -3039,8 +2970,8 @@ Stream &Stream::ThenBlasHpmv(blas::UpperLower uplo, uint64 n,
   ThenBlasImpl<blas::UpperLower, uint64, std::complex<float>,
                const DeviceMemory<std::complex<float>> &,
                const DeviceMemory<std::complex<float>> &, int,
-               std::complex<float>, DeviceMemory<std::complex<float>> *,
-               int> impl;
+               std::complex<float>, DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHpmv, uplo, n, alpha, ap, x, incx,
               beta, y, incy);
 }
@@ -3057,8 +2988,8 @@ Stream &Stream::ThenBlasHpmv(blas::UpperLower uplo, uint64 n,
   ThenBlasImpl<blas::UpperLower, uint64, std::complex<double>,
                const DeviceMemory<std::complex<double>> &,
                const DeviceMemory<std::complex<double>> &, int,
-               std::complex<double>, DeviceMemory<std::complex<double>> *,
-               int> impl;
+               std::complex<double>, DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHpmv, uplo, n, alpha, ap, x, incx,
               beta, y, incy);
 }
@@ -3071,7 +3002,8 @@ Stream &Stream::ThenBlasHpr(blas::UpperLower uplo, uint64 n, float alpha,
 
   ThenBlasImpl<blas::UpperLower, uint64, float,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *> impl;
+               DeviceMemory<std::complex<float>> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHpr, uplo, n, alpha, x, incx, ap);
 }
 
@@ -3083,7 +3015,8 @@ Stream &Stream::ThenBlasHpr(blas::UpperLower uplo, uint64 n, double alpha,
 
   ThenBlasImpl<blas::UpperLower, uint64, double,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *> impl;
+               DeviceMemory<std::complex<double>> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHpr, uplo, n, alpha, x, incx, ap);
 }
 
@@ -3099,7 +3032,8 @@ Stream &Stream::ThenBlasHpr2(blas::UpperLower uplo, uint64 n,
   ThenBlasImpl<blas::UpperLower, uint64, std::complex<float>,
                const DeviceMemory<std::complex<float>> &, int,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *> impl;
+               DeviceMemory<std::complex<float>> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHpr2, uplo, n, alpha, x, incx, y,
               incy, ap);
 }
@@ -3116,7 +3050,8 @@ Stream &Stream::ThenBlasHpr2(blas::UpperLower uplo, uint64 n,
   ThenBlasImpl<blas::UpperLower, uint64, std::complex<double>,
                const DeviceMemory<std::complex<double>> &, int,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *> impl;
+               DeviceMemory<std::complex<double>> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHpr2, uplo, n, alpha, x, incx, y,
               incy, ap);
 }
@@ -3130,7 +3065,8 @@ Stream &Stream::ThenBlasSbmv(blas::UpperLower uplo, uint64 n, uint64 k,
 
   ThenBlasImpl<blas::UpperLower, uint64, uint64, float,
                const DeviceMemory<float> &, int, const DeviceMemory<float> &,
-               int, float, DeviceMemory<float> *, int> impl;
+               int, float, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSbmv, uplo, n, k, alpha, a, lda,
               x, incx, beta, y, incy);
 }
@@ -3144,7 +3080,8 @@ Stream &Stream::ThenBlasSbmv(blas::UpperLower uplo, uint64 n, uint64 k,
 
   ThenBlasImpl<blas::UpperLower, uint64, uint64, double,
                const DeviceMemory<double> &, int, const DeviceMemory<double> &,
-               int, double, DeviceMemory<double> *, int> impl;
+               int, double, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSbmv, uplo, n, k, alpha, a, lda,
               x, incx, beta, y, incy);
 }
@@ -3158,7 +3095,8 @@ Stream &Stream::ThenBlasSpmv(blas::UpperLower uplo, uint64 n, float alpha,
 
   ThenBlasImpl<blas::UpperLower, uint64, float, const DeviceMemory<float> &,
                const DeviceMemory<float> &, int, float, DeviceMemory<float> *,
-               int> impl;
+               int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSpmv, uplo, n, alpha, ap, x, incx,
               beta, y, incy);
 }
@@ -3172,7 +3110,8 @@ Stream &Stream::ThenBlasSpmv(blas::UpperLower uplo, uint64 n, double alpha,
 
   ThenBlasImpl<blas::UpperLower, uint64, double, const DeviceMemory<double> &,
                const DeviceMemory<double> &, int, double,
-               DeviceMemory<double> *, int> impl;
+               DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSpmv, uplo, n, alpha, ap, x, incx,
               beta, y, incy);
 }
@@ -3184,7 +3123,8 @@ Stream &Stream::ThenBlasSpr(blas::UpperLower uplo, uint64 n, float alpha,
             PARAM(ap));
 
   ThenBlasImpl<blas::UpperLower, uint64, float, const DeviceMemory<float> &,
-               int, DeviceMemory<float> *> impl;
+               int, DeviceMemory<float> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSpr, uplo, n, alpha, x, incx, ap);
 }
 
@@ -3195,7 +3135,8 @@ Stream &Stream::ThenBlasSpr(blas::UpperLower uplo, uint64 n, double alpha,
             PARAM(ap));
 
   ThenBlasImpl<blas::UpperLower, uint64, double, const DeviceMemory<double> &,
-               int, DeviceMemory<double> *> impl;
+               int, DeviceMemory<double> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSpr, uplo, n, alpha, x, incx, ap);
 }
 
@@ -3207,8 +3148,8 @@ Stream &Stream::ThenBlasSpr2(blas::UpperLower uplo, uint64 n, float alpha,
             PARAM(y), PARAM(incy), PARAM(ap));
 
   ThenBlasImpl<blas::UpperLower, uint64, float, const DeviceMemory<float> &,
-               int, const DeviceMemory<float> &, int,
-               DeviceMemory<float> *> impl;
+               int, const DeviceMemory<float> &, int, DeviceMemory<float> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSpr2, uplo, n, alpha, x, incx, y,
               incy, ap);
 }
@@ -3221,8 +3162,8 @@ Stream &Stream::ThenBlasSpr2(blas::UpperLower uplo, uint64 n, double alpha,
             PARAM(y), PARAM(incy), PARAM(ap));
 
   ThenBlasImpl<blas::UpperLower, uint64, double, const DeviceMemory<double> &,
-               int, const DeviceMemory<double> &, int,
-               DeviceMemory<double> *> impl;
+               int, const DeviceMemory<double> &, int, DeviceMemory<double> *>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSpr2, uplo, n, alpha, x, incx, y,
               incy, ap);
 }
@@ -3236,7 +3177,8 @@ Stream &Stream::ThenBlasSymv(blas::UpperLower uplo, uint64 n, float alpha,
 
   ThenBlasImpl<blas::UpperLower, uint64, float, const DeviceMemory<float> &,
                int, const DeviceMemory<float> &, int, float,
-               DeviceMemory<float> *, int> impl;
+               DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSymv, uplo, n, alpha, a, lda, x,
               incx, beta, y, incy);
 }
@@ -3250,7 +3192,8 @@ Stream &Stream::ThenBlasSymv(blas::UpperLower uplo, uint64 n, double alpha,
 
   ThenBlasImpl<blas::UpperLower, uint64, double, const DeviceMemory<double> &,
                int, const DeviceMemory<double> &, int, double,
-               DeviceMemory<double> *, int> impl;
+               DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSymv, uplo, n, alpha, a, lda, x,
               incx, beta, y, incy);
 }
@@ -3262,7 +3205,8 @@ Stream &Stream::ThenBlasSyr(blas::UpperLower uplo, uint64 n, float alpha,
             PARAM(a), PARAM(lda));
 
   ThenBlasImpl<blas::UpperLower, uint64, float, const DeviceMemory<float> &,
-               int, DeviceMemory<float> *, int> impl;
+               int, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyr, uplo, n, alpha, x, incx, a,
               lda);
 }
@@ -3274,7 +3218,8 @@ Stream &Stream::ThenBlasSyr(blas::UpperLower uplo, uint64 n, double alpha,
             PARAM(a), PARAM(lda));
 
   ThenBlasImpl<blas::UpperLower, uint64, double, const DeviceMemory<double> &,
-               int, DeviceMemory<double> *, int> impl;
+               int, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyr, uplo, n, alpha, x, incx, a,
               lda);
 }
@@ -3288,7 +3233,8 @@ Stream &Stream::ThenBlasSyr2(blas::UpperLower uplo, uint64 n, float alpha,
 
   ThenBlasImpl<blas::UpperLower, uint64, float, const DeviceMemory<float> &,
                int, const DeviceMemory<float> &, int, DeviceMemory<float> *,
-               int> impl;
+               int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyr2, uplo, n, alpha, x, incx, y,
               incy, a, lda);
 }
@@ -3302,7 +3248,8 @@ Stream &Stream::ThenBlasSyr2(blas::UpperLower uplo, uint64 n, double alpha,
 
   ThenBlasImpl<blas::UpperLower, uint64, double, const DeviceMemory<double> &,
                int, const DeviceMemory<double> &, int, DeviceMemory<double> *,
-               int> impl;
+               int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyr2, uplo, n, alpha, x, incx, y,
               incy, a, lda);
 }
@@ -3316,7 +3263,8 @@ Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                uint64, const DeviceMemory<float> &, int, DeviceMemory<float> *,
-               int> impl;
+               int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a,
               lda, x, incx);
 }
@@ -3330,7 +3278,8 @@ Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                uint64, const DeviceMemory<double> &, int,
-               DeviceMemory<double> *, int> impl;
+               DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a,
               lda, x, incx);
 }
@@ -3345,7 +3294,8 @@ Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                uint64, const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a,
               lda, x, incx);
 }
@@ -3360,7 +3310,8 @@ Stream &Stream::ThenBlasTbmv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                uint64, const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTbmv, uplo, trans, diag, n, k, a,
               lda, x, incx);
 }
@@ -3374,7 +3325,8 @@ Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                uint64, const DeviceMemory<float> &, int, DeviceMemory<float> *,
-               int> impl;
+               int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a,
               lda, x, incx);
 }
@@ -3388,7 +3340,8 @@ Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                uint64, const DeviceMemory<double> &, int,
-               DeviceMemory<double> *, int> impl;
+               DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a,
               lda, x, incx);
 }
@@ -3403,7 +3356,8 @@ Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                uint64, const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a,
               lda, x, incx);
 }
@@ -3418,7 +3372,8 @@ Stream &Stream::ThenBlasTbsv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                uint64, const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTbsv, uplo, trans, diag, n, k, a,
               lda, x, incx);
 }
@@ -3431,7 +3386,8 @@ Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans,
             PARAM(x), PARAM(incx));
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
-               const DeviceMemory<float> &, DeviceMemory<float> *, int> impl;
+               const DeviceMemory<float> &, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x,
               incx);
 }
@@ -3444,7 +3400,8 @@ Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans,
             PARAM(x), PARAM(incx));
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
-               const DeviceMemory<double> &, DeviceMemory<double> *, int> impl;
+               const DeviceMemory<double> &, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x,
               incx);
 }
@@ -3458,7 +3415,8 @@ Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                const DeviceMemory<std::complex<float>> &,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x,
               incx);
 }
@@ -3472,7 +3430,8 @@ Stream &Stream::ThenBlasTpmv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                const DeviceMemory<std::complex<double>> &,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTpmv, uplo, trans, diag, n, ap, x,
               incx);
 }
@@ -3485,7 +3444,8 @@ Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans,
             PARAM(x), PARAM(incx));
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
-               const DeviceMemory<float> &, DeviceMemory<float> *, int> impl;
+               const DeviceMemory<float> &, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x,
               incx);
 }
@@ -3498,7 +3458,8 @@ Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans,
             PARAM(x), PARAM(incx));
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
-               const DeviceMemory<double> &, DeviceMemory<double> *, int> impl;
+               const DeviceMemory<double> &, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x,
               incx);
 }
@@ -3512,7 +3473,8 @@ Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                const DeviceMemory<std::complex<float>> &,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x,
               incx);
 }
@@ -3526,7 +3488,8 @@ Stream &Stream::ThenBlasTpsv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                const DeviceMemory<std::complex<double>> &,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTpsv, uplo, trans, diag, n, ap, x,
               incx);
 }
@@ -3539,8 +3502,8 @@ Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans,
             PARAM(lda), PARAM(x), PARAM(incx));
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
-               const DeviceMemory<float> &, int, DeviceMemory<float> *,
-               int> impl;
+               const DeviceMemory<float> &, int, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a,
               lda, x, incx);
 }
@@ -3553,8 +3516,8 @@ Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans,
             PARAM(lda), PARAM(x), PARAM(incx));
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
-               const DeviceMemory<double> &, int, DeviceMemory<double> *,
-               int> impl;
+               const DeviceMemory<double> &, int, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a,
               lda, x, incx);
 }
@@ -3569,7 +3532,8 @@ Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a,
               lda, x, incx);
 }
@@ -3584,7 +3548,8 @@ Stream &Stream::ThenBlasTrmv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrmv, uplo, trans, diag, n, a,
               lda, x, incx);
 }
@@ -3597,8 +3562,8 @@ Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans,
             PARAM(lda), PARAM(x), PARAM(incx));
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
-               const DeviceMemory<float> &, int, DeviceMemory<float> *,
-               int> impl;
+               const DeviceMemory<float> &, int, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a,
               lda, x, incx);
 }
@@ -3611,8 +3576,8 @@ Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans,
             PARAM(lda), PARAM(x), PARAM(incx));
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
-               const DeviceMemory<double> &, int, DeviceMemory<double> *,
-               int> impl;
+               const DeviceMemory<double> &, int, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a,
               lda, x, incx);
 }
@@ -3627,7 +3592,8 @@ Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a,
               lda, x, incx);
 }
@@ -3642,7 +3608,8 @@ Stream &Stream::ThenBlasTrsv(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, blas::Diagonal, uint64,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrsv, uplo, trans, diag, n, a,
               lda, x, incx);
 }
@@ -3651,16 +3618,17 @@ Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb,
                              uint64 m, uint64 n, uint64 k, float alpha,
                              const DeviceMemory<Eigen::half> &a, int lda,
                              const DeviceMemory<Eigen::half> &b, int ldb,
-                             float beta,
-                             DeviceMemory<Eigen::half> *c, int ldc) {
+                             float beta, DeviceMemory<Eigen::half> *c,
+                             int ldc) {
   VLOG_CALL(PARAM(transa), PARAM(transb), PARAM(m), PARAM(n), PARAM(k),
             PARAM(alpha), PARAM(a), PARAM(lda), PARAM(b), PARAM(ldb),
             PARAM(beta), PARAM(c), PARAM(ldc));
 
   ThenBlasImpl<blas::Transpose, blas::Transpose, uint64, uint64, uint64, float,
                const DeviceMemory<Eigen::half> &, int,
-               const DeviceMemory<Eigen::half> &, int,
-               float, DeviceMemory<Eigen::half> *, int> impl;
+               const DeviceMemory<Eigen::half> &, int, float,
+               DeviceMemory<Eigen::half> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k,
               alpha, a, lda, b, ldb, beta, c, ldc);
 }
@@ -3676,7 +3644,8 @@ Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb,
 
   ThenBlasImpl<blas::Transpose, blas::Transpose, uint64, uint64, uint64, float,
                const DeviceMemory<float> &, int, const DeviceMemory<float> &,
-               int, float, DeviceMemory<float> *, int> impl;
+               int, float, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k,
               alpha, a, lda, b, ldb, beta, c, ldc);
 }
@@ -3692,7 +3661,8 @@ Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb,
 
   ThenBlasImpl<blas::Transpose, blas::Transpose, uint64, uint64, uint64, double,
                const DeviceMemory<double> &, int, const DeviceMemory<double> &,
-               int, double, DeviceMemory<double> *, int> impl;
+               int, double, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k,
               alpha, a, lda, b, ldb, beta, c, ldc);
 }
@@ -3712,8 +3682,8 @@ Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb,
   ThenBlasImpl<blas::Transpose, blas::Transpose, uint64, uint64, uint64,
                std::complex<float>, const DeviceMemory<std::complex<float>> &,
                int, const DeviceMemory<std::complex<float>> &, int,
-               std::complex<float>, DeviceMemory<std::complex<float>> *,
-               int> impl;
+               std::complex<float>, DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k,
               alpha, a, lda, b, ldb, beta, c, ldc);
 }
@@ -3733,8 +3703,8 @@ Stream &Stream::ThenBlasGemm(blas::Transpose transa, blas::Transpose transb,
   ThenBlasImpl<blas::Transpose, blas::Transpose, uint64, uint64, uint64,
                std::complex<double>, const DeviceMemory<std::complex<double>> &,
                int, const DeviceMemory<std::complex<double>> &, int,
-               std::complex<double>, DeviceMemory<std::complex<double>> *,
-               int> impl;
+               std::complex<double>, DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasGemm, transa, transb, m, n, k,
               alpha, a, lda, b, ldb, beta, c, ldc);
 }
@@ -4100,8 +4070,8 @@ Stream &Stream::ThenBlasHemm(blas::Side side, blas::UpperLower uplo, uint64 m,
   ThenBlasImpl<blas::Side, blas::UpperLower, uint64, uint64,
                std::complex<float>, const DeviceMemory<std::complex<float>> &,
                int, const DeviceMemory<std::complex<float>> &, int,
-               std::complex<float>, DeviceMemory<std::complex<float>> *,
-               int> impl;
+               std::complex<float>, DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHemm, side, uplo, m, n, alpha, a,
               lda, b, ldb, beta, c, ldc);
 }
@@ -4120,8 +4090,8 @@ Stream &Stream::ThenBlasHemm(blas::Side side, blas::UpperLower uplo, uint64 m,
   ThenBlasImpl<blas::Side, blas::UpperLower, uint64, uint64,
                std::complex<double>, const DeviceMemory<std::complex<double>> &,
                int, const DeviceMemory<std::complex<double>> &, int,
-               std::complex<double>, DeviceMemory<std::complex<double>> *,
-               int> impl;
+               std::complex<double>, DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHemm, side, uplo, m, n, alpha, a,
               lda, b, ldb, beta, c, ldc);
 }
@@ -4136,7 +4106,8 @@ Stream &Stream::ThenBlasHerk(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64, float,
                const DeviceMemory<std::complex<float>> &, int, float,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHerk, uplo, trans, n, k, alpha, a,
               lda, beta, c, ldc);
 }
@@ -4151,7 +4122,8 @@ Stream &Stream::ThenBlasHerk(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64, double,
                const DeviceMemory<std::complex<double>> &, int, double,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHerk, uplo, trans, n, k, alpha, a,
               lda, beta, c, ldc);
 }
@@ -4170,7 +4142,8 @@ Stream &Stream::ThenBlasHer2k(blas::UpperLower uplo, blas::Transpose trans,
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64,
                std::complex<float>, const DeviceMemory<std::complex<float>> &,
                int, const DeviceMemory<std::complex<float>> &, int, float,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHer2k, uplo, trans, n, k, alpha,
               a, lda, b, ldb, beta, c, ldc);
 }
@@ -4189,7 +4162,8 @@ Stream &Stream::ThenBlasHer2k(blas::UpperLower uplo, blas::Transpose trans,
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64,
                std::complex<double>, const DeviceMemory<std::complex<double>> &,
                int, const DeviceMemory<std::complex<double>> &, int, double,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasHer2k, uplo, trans, n, k, alpha,
               a, lda, b, ldb, beta, c, ldc);
 }
@@ -4205,7 +4179,8 @@ Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m,
 
   ThenBlasImpl<blas::Side, blas::UpperLower, uint64, uint64, float,
                const DeviceMemory<float> &, int, const DeviceMemory<float> &,
-               int, float, DeviceMemory<float> *, int> impl;
+               int, float, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a,
               lda, b, ldb, beta, c, ldc);
 }
@@ -4221,7 +4196,8 @@ Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m,
 
   ThenBlasImpl<blas::Side, blas::UpperLower, uint64, uint64, double,
                const DeviceMemory<double> &, int, const DeviceMemory<double> &,
-               int, double, DeviceMemory<double> *, int> impl;
+               int, double, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a,
               lda, b, ldb, beta, c, ldc);
 }
@@ -4240,8 +4216,8 @@ Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m,
   ThenBlasImpl<blas::Side, blas::UpperLower, uint64, uint64,
                std::complex<float>, const DeviceMemory<std::complex<float>> &,
                int, const DeviceMemory<std::complex<float>> &, int,
-               std::complex<float>, DeviceMemory<std::complex<float>> *,
-               int> impl;
+               std::complex<float>, DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a,
               lda, b, ldb, beta, c, ldc);
 }
@@ -4260,8 +4236,8 @@ Stream &Stream::ThenBlasSymm(blas::Side side, blas::UpperLower uplo, uint64 m,
   ThenBlasImpl<blas::Side, blas::UpperLower, uint64, uint64,
                std::complex<double>, const DeviceMemory<std::complex<double>> &,
                int, const DeviceMemory<std::complex<double>> &, int,
-               std::complex<double>, DeviceMemory<std::complex<double>> *,
-               int> impl;
+               std::complex<double>, DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSymm, side, uplo, m, n, alpha, a,
               lda, b, ldb, beta, c, ldc);
 }
@@ -4275,7 +4251,8 @@ Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64, float,
                const DeviceMemory<float> &, int, float, DeviceMemory<float> *,
-               int> impl;
+               int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a,
               lda, beta, c, ldc);
 }
@@ -4289,7 +4266,8 @@ Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64, double,
                const DeviceMemory<double> &, int, double,
-               DeviceMemory<double> *, int> impl;
+               DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a,
               lda, beta, c, ldc);
 }
@@ -4305,7 +4283,8 @@ Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans,
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64,
                std::complex<float>, const DeviceMemory<std::complex<float>> &,
                int, std::complex<float>, DeviceMemory<std::complex<float>> *,
-               int> impl;
+               int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a,
               lda, beta, c, ldc);
 }
@@ -4321,7 +4300,8 @@ Stream &Stream::ThenBlasSyrk(blas::UpperLower uplo, blas::Transpose trans,
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64,
                std::complex<double>, const DeviceMemory<std::complex<double>> &,
                int, std::complex<double>, DeviceMemory<std::complex<double>> *,
-               int> impl;
+               int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyrk, uplo, trans, n, k, alpha, a,
               lda, beta, c, ldc);
 }
@@ -4337,7 +4317,8 @@ Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64, float,
                const DeviceMemory<float> &, int, const DeviceMemory<float> &,
-               int, float, DeviceMemory<float> *, int> impl;
+               int, float, DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha,
               a, lda, b, ldb, beta, c, ldc);
 }
@@ -4353,7 +4334,8 @@ Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans,
 
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64, double,
                const DeviceMemory<double> &, int, const DeviceMemory<double> &,
-               int, double, DeviceMemory<double> *, int> impl;
+               int, double, DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha,
               a, lda, b, ldb, beta, c, ldc);
 }
@@ -4372,8 +4354,8 @@ Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans,
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64,
                std::complex<float>, const DeviceMemory<std::complex<float>> &,
                int, const DeviceMemory<std::complex<float>> &, int,
-               std::complex<float>, DeviceMemory<std::complex<float>> *,
-               int> impl;
+               std::complex<float>, DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha,
               a, lda, b, ldb, beta, c, ldc);
 }
@@ -4392,8 +4374,8 @@ Stream &Stream::ThenBlasSyr2k(blas::UpperLower uplo, blas::Transpose trans,
   ThenBlasImpl<blas::UpperLower, blas::Transpose, uint64, uint64,
                std::complex<double>, const DeviceMemory<std::complex<double>> &,
                int, const DeviceMemory<std::complex<double>> &, int,
-               std::complex<double>, DeviceMemory<std::complex<double>> *,
-               int> impl;
+               std::complex<double>, DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasSyr2k, uplo, trans, n, k, alpha,
               a, lda, b, ldb, beta, c, ldc);
 }
@@ -4408,7 +4390,8 @@ Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo,
 
   ThenBlasImpl<blas::Side, blas::UpperLower, blas::Transpose, blas::Diagonal,
                uint64, uint64, float, const DeviceMemory<float> &, int,
-               DeviceMemory<float> *, int> impl;
+               DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m,
               n, alpha, a, lda, b, ldb);
 }
@@ -4423,7 +4406,8 @@ Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo,
 
   ThenBlasImpl<blas::Side, blas::UpperLower, blas::Transpose, blas::Diagonal,
                uint64, uint64, double, const DeviceMemory<double> &, int,
-               DeviceMemory<double> *, int> impl;
+               DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m,
               n, alpha, a, lda, b, ldb);
 }
@@ -4440,7 +4424,8 @@ Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo,
   ThenBlasImpl<blas::Side, blas::UpperLower, blas::Transpose, blas::Diagonal,
                uint64, uint64, std::complex<float>,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m,
               n, alpha, a, lda, b, ldb);
 }
@@ -4457,7 +4442,8 @@ Stream &Stream::ThenBlasTrmm(blas::Side side, blas::UpperLower uplo,
   ThenBlasImpl<blas::Side, blas::UpperLower, blas::Transpose, blas::Diagonal,
                uint64, uint64, std::complex<double>,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrmm, side, uplo, transa, diag, m,
               n, alpha, a, lda, b, ldb);
 }
@@ -4472,7 +4458,8 @@ Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo,
 
   ThenBlasImpl<blas::Side, blas::UpperLower, blas::Transpose, blas::Diagonal,
                uint64, uint64, float, const DeviceMemory<float> &, int,
-               DeviceMemory<float> *, int> impl;
+               DeviceMemory<float> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m,
               n, alpha, a, lda, b, ldb);
 }
@@ -4487,7 +4474,8 @@ Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo,
 
   ThenBlasImpl<blas::Side, blas::UpperLower, blas::Transpose, blas::Diagonal,
                uint64, uint64, double, const DeviceMemory<double> &, int,
-               DeviceMemory<double> *, int> impl;
+               DeviceMemory<double> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m,
               n, alpha, a, lda, b, ldb);
 }
@@ -4504,7 +4492,8 @@ Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo,
   ThenBlasImpl<blas::Side, blas::UpperLower, blas::Transpose, blas::Diagonal,
                uint64, uint64, std::complex<float>,
                const DeviceMemory<std::complex<float>> &, int,
-               DeviceMemory<std::complex<float>> *, int> impl;
+               DeviceMemory<std::complex<float>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m,
               n, alpha, a, lda, b, ldb);
 }
@@ -4521,7 +4510,8 @@ Stream &Stream::ThenBlasTrsm(blas::Side side, blas::UpperLower uplo,
   ThenBlasImpl<blas::Side, blas::UpperLower, blas::Transpose, blas::Diagonal,
                uint64, uint64, std::complex<double>,
                const DeviceMemory<std::complex<double>> &, int,
-               DeviceMemory<std::complex<double>> *, int> impl;
+               DeviceMemory<std::complex<double>> *, int>
+      impl;
   return impl(this, &blas::BlasSupport::DoBlasTrsm, side, uplo, transa, diag, m,
               n, alpha, a, lda, b, ldb);
 }
@@ -4814,17 +4804,11 @@ Stream &Stream::ThenBlasGemmStridedBatched(
 Stream &Stream::ThenSetRngSeed(const uint8 *seed, uint64 seed_bytes) {
   VLOG_CALL(PARAM(seed), PARAM(seed_bytes));
 
-  if (ok()) {
-    if (rng::RngSupport *rng = parent_->AsRng()) {
-      CheckError(rng->SetSeed(this, seed, seed_bytes));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers() << " unable to initialize RNG";
-    }
+  if (rng::RngSupport *rng = parent_->AsRng()) {
+    CheckError(rng->SetSeed(this, seed, seed_bytes));
   } else {
-    LOG(INFO) << DebugStreamPointers()
-              << " did not set RNG seed: " << static_cast<const void *>(seed)
-              << "; bytes: " << seed_bytes;
+    SetError();
+    LOG(INFO) << DebugStreamPointers() << " unable to initialize RNG";
   }
   return *this;
 }
@@ -4832,15 +4816,13 @@ Stream &Stream::ThenSetRngSeed(const uint8 *seed, uint64 seed_bytes) {
 Stream &Stream::ThenPopulateRandUniform(DeviceMemory<float> *values) {
   VLOG_CALL(PARAM(values));
 
-  if (ok()) {
-    if (rng::RngSupport *rng = parent_->AsRng()) {
-      CheckError(rng->DoPopulateRandUniform(this, values));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform RNG operation using StreamExecutor"
-                   " without RNG support.";
-    }
+  if (rng::RngSupport *rng = parent_->AsRng()) {
+    CheckError(rng->DoPopulateRandUniform(this, values));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform RNG operation using StreamExecutor"
+                 " without RNG support.";
   }
   return *this;
 }
@@ -4849,15 +4831,13 @@ Stream &Stream::ThenPopulateRandGaussian(float mean, float sd,
                                          DeviceMemory<float> *values) {
   VLOG_CALL(PARAM(mean), PARAM(sd), PARAM(values));
 
-  if (ok()) {
-    if (rng::RngSupport *rng = parent_->AsRng()) {
-      CheckError(rng->DoPopulateRandGaussian(this, mean, sd, values));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform RNG operation using StreamExecutor"
-                   " without RNG support.";
-    }
+  if (rng::RngSupport *rng = parent_->AsRng()) {
+    CheckError(rng->DoPopulateRandGaussian(this, mean, sd, values));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform RNG operation using StreamExecutor"
+                 " without RNG support.";
   }
   return *this;
 }
@@ -4866,15 +4846,13 @@ Stream &Stream::ThenPopulateRandGaussian(double mean, double sd,
                                          DeviceMemory<double> *values) {
   VLOG_CALL(PARAM(mean), PARAM(sd), PARAM(values));
 
-  if (ok()) {
-    if (rng::RngSupport *rng = parent_->AsRng()) {
-      CheckError(rng->DoPopulateRandGaussian(this, mean, sd, values));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform RNG operation using StreamExecutor"
-                   " without RNG support.";
-    }
+  if (rng::RngSupport *rng = parent_->AsRng()) {
+    CheckError(rng->DoPopulateRandGaussian(this, mean, sd, values));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform RNG operation using StreamExecutor"
+                 " without RNG support.";
   }
   return *this;
 }
@@ -4882,15 +4860,13 @@ Stream &Stream::ThenPopulateRandGaussian(double mean, double sd,
 Stream &Stream::ThenPopulateRandUniform(DeviceMemory<double> *values) {
   VLOG_CALL(PARAM(values));
 
-  if (ok()) {
-    if (rng::RngSupport *rng = parent_->AsRng()) {
-      CheckError(rng->DoPopulateRandUniform(this, values));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform RNG operation using StreamExecutor"
-                   " without RNG support.";
-    }
+  if (rng::RngSupport *rng = parent_->AsRng()) {
+    CheckError(rng->DoPopulateRandUniform(this, values));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform RNG operation using StreamExecutor"
+                 " without RNG support.";
   }
   return *this;
 }
@@ -4899,15 +4875,13 @@ Stream &Stream::ThenPopulateRandUniform(
     DeviceMemory<std::complex<float>> *values) {
   VLOG_CALL(PARAM(values));
 
-  if (ok()) {
-    if (rng::RngSupport *rng = parent_->AsRng()) {
-      CheckError(rng->DoPopulateRandUniform(this, values));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform RNG operation using StreamExecutor"
-                   " without RNG support.";
-    }
+  if (rng::RngSupport *rng = parent_->AsRng()) {
+    CheckError(rng->DoPopulateRandUniform(this, values));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform RNG operation using StreamExecutor"
+                 " without RNG support.";
   }
   return *this;
 }
@@ -4916,15 +4890,13 @@ Stream &Stream::ThenPopulateRandUniform(
     DeviceMemory<std::complex<double>> *values) {
   VLOG_CALL(PARAM(values));
 
-  if (ok()) {
-    if (rng::RngSupport *rng = parent_->AsRng()) {
-      CheckError(rng->DoPopulateRandUniform(this, values));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform RNG operation using StreamExecutor"
-                   " without RNG support.";
-    }
+  if (rng::RngSupport *rng = parent_->AsRng()) {
+    CheckError(rng->DoPopulateRandUniform(this, values));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform RNG operation using StreamExecutor"
+                 " without RNG support.";
   }
   return *this;
 }
@@ -4933,12 +4905,7 @@ Stream &Stream::ThenMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src,
                            uint64 size) {
   VLOG_CALL(PARAM(host_dst), PARAM(gpu_src), PARAM(size));
 
-  if (ok()) {
-    CheckError(parent_->Memcpy(this, host_dst, gpu_src, size));
-  } else {
-    LOG(INFO) << DebugStreamPointers()
-              << " did not memcpy device-to-host; source: " << gpu_src.opaque();
-  }
+  CheckError(parent_->Memcpy(this, host_dst, gpu_src, size));
   return *this;
 }
 
@@ -4946,12 +4913,7 @@ Stream &Stream::ThenMemcpy(DeviceMemoryBase *gpu_dst, const void *host_src,
                            uint64 size) {
   VLOG_CALL(PARAM(gpu_dst), PARAM(host_src), PARAM(size));
 
-  if (ok()) {
-    CheckError(parent_->Memcpy(this, gpu_dst, host_src, size));
-  } else {
-    LOG(INFO) << DebugStreamPointers()
-              << " did not memcpy host-to-device; source: " << host_src;
-  }
+  CheckError(parent_->Memcpy(this, gpu_dst, host_src, size));
   return *this;
 }
 
@@ -4959,24 +4921,14 @@ Stream &Stream::ThenMemcpy(DeviceMemoryBase *gpu_dst,
                            const DeviceMemoryBase &gpu_src, uint64 size) {
   VLOG_CALL(PARAM(gpu_dst), PARAM(gpu_src), PARAM(size));
 
-  if (ok()) {
-    CheckError(parent_->MemcpyDeviceToDevice(this, gpu_dst, gpu_src, size));
-  } else {
-    LOG(INFO) << DebugStreamPointers()
-              << " did not memcpy gpu-to-gpu; source: " << &gpu_src;
-  }
+  CheckError(parent_->MemcpyDeviceToDevice(this, gpu_dst, gpu_src, size));
   return *this;
 }
 
 Stream &Stream::ThenMemZero(DeviceMemoryBase *location, uint64 size) {
   VLOG_CALL(PARAM(location), PARAM(size));
 
-  if (ok()) {
-    CheckStatus(parent_->MemZero(this, location, size));
-  } else {
-    LOG(INFO) << DebugStreamPointers()
-              << " did not memzero GPU location; source: " << location;
-  }
+  CheckStatus(parent_->MemZero(this, location, size));
   return *this;
 }
 
@@ -4984,13 +4936,7 @@ Stream &Stream::ThenMemset32(DeviceMemoryBase *location, uint32 pattern,
                              uint64 size) {
   VLOG_CALL(PARAM(location), PARAM(pattern), PARAM(size));
 
-  if (ok()) {
-    CheckStatus(parent_->Memset32(this, location, pattern, size));
-  } else {
-    LOG(INFO) << DebugStreamPointers()
-              << " did not memset GPU location; source: " << location
-              << "; size: " << size << "; pattern: " << std::hex << pattern;
-  }
+  CheckStatus(parent_->Memset32(this, location, pattern, size));
   return *this;
 }
 
@@ -5013,20 +4959,17 @@ Stream &Stream::ThenRnnForward(
     ScratchAllocator *workspace_allocator,
     dnn::ProfileResult *output_profile_result) {
   // TODO(zhengxq): add VLOG PARAM calls.
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      auto status = dnn->DoRnnForward(
-          this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
-          input_c_desc, input_c_data, params, output_desc, output_data,
-          output_h_desc, output_h_data, output_c_desc, output_c_data,
-          is_training, reserve_space_allocator, workspace_allocator,
-          output_profile_result);
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    auto status = dnn->DoRnnForward(
+        this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
+        input_c_desc, input_c_data, params, output_desc, output_data,
+        output_h_desc, output_h_data, output_c_desc, output_c_data, is_training,
+        reserve_space_allocator, workspace_allocator, output_profile_result);
+    if (!status && !output_profile_result) {
+      SetError();
     }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -5049,20 +4992,17 @@ Stream &Stream::ThenRnnForward(
     ScratchAllocator *workspace_allocator,
     dnn::ProfileResult *output_profile_result) {
   // TODO(zhengxq): add VLOG PARAM calls.
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      auto status = dnn->DoRnnForward(
-          this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
-          input_c_desc, input_c_data, params, output_desc, output_data,
-          output_h_desc, output_h_data, output_c_desc, output_c_data,
-          is_training, reserve_space_allocator, workspace_allocator,
-          output_profile_result);
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    auto status = dnn->DoRnnForward(
+        this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
+        input_c_desc, input_c_data, params, output_desc, output_data,
+        output_h_desc, output_h_data, output_c_desc, output_c_data, is_training,
+        reserve_space_allocator, workspace_allocator, output_profile_result);
+    if (!status && !output_profile_result) {
+      SetError();
     }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -5086,20 +5026,17 @@ Stream &Stream::ThenRnnForward(
     ScratchAllocator *workspace_allocator,
     dnn::ProfileResult *output_profile_result) {
   // TODO(zhengxq): add VLOG PARAM calls.
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      auto status = dnn->DoRnnForward(
-          this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
-          input_c_desc, input_c_data, params, output_desc, output_data,
-          output_h_desc, output_h_data, output_c_desc, output_c_data,
-          is_training, reserve_space_allocator, workspace_allocator,
-          output_profile_result);
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    auto status = dnn->DoRnnForward(
+        this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
+        input_c_desc, input_c_data, params, output_desc, output_data,
+        output_h_desc, output_h_data, output_c_desc, output_c_data, is_training,
+        reserve_space_allocator, workspace_allocator, output_profile_result);
+    if (!status && !output_profile_result) {
+      SetError();
     }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -5130,23 +5067,21 @@ Stream &Stream::ThenRnnBackward(
     ScratchAllocator *workspace_allocator,
     dnn::ProfileResult *output_profile_result) {
   // TODO(zhengxq): add VLOG PARAM calls.
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      auto status = dnn->DoRnnBackward(
-          this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
-          input_c_desc, input_c_data, params, output_desc, output_data,
-          output_h_desc, output_h_data, output_c_desc, output_c_data,
-          output_backprop_data, output_h_backprop_data, output_c_backprop_data,
-          input_backprop_data, input_h_backprop_data, input_c_backprop_data,
-          params_backprop_data, reserve_space_data, workspace_allocator,
-          output_profile_result);
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    auto status = dnn->DoRnnBackward(
+        this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
+        input_c_desc, input_c_data, params, output_desc, output_data,
+        output_h_desc, output_h_data, output_c_desc, output_c_data,
+        output_backprop_data, output_h_backprop_data, output_c_backprop_data,
+        input_backprop_data, input_h_backprop_data, input_c_backprop_data,
+        params_backprop_data, reserve_space_data, workspace_allocator,
+        output_profile_result);
+    if (!status && !output_profile_result) {
       SetError();
-      LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support";
     }
+  } else {
+    SetError();
+    LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support";
   }
   return *this;
 }
@@ -5176,23 +5111,21 @@ Stream &Stream::ThenRnnBackward(
     ScratchAllocator *workspace_allocator,
     dnn::ProfileResult *output_profile_result) {
   // TODO(zhengxq): add VLOG PARAM calls.
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      auto status = dnn->DoRnnBackward(
-          this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
-          input_c_desc, input_c_data, params, output_desc, output_data,
-          output_h_desc, output_h_data, output_c_desc, output_c_data,
-          output_backprop_data, output_h_backprop_data, output_c_backprop_data,
-          input_backprop_data, input_h_backprop_data, input_c_backprop_data,
-          params_backprop_data, reserve_space_data, workspace_allocator,
-          output_profile_result);
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    auto status = dnn->DoRnnBackward(
+        this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
+        input_c_desc, input_c_data, params, output_desc, output_data,
+        output_h_desc, output_h_data, output_c_desc, output_c_data,
+        output_backprop_data, output_h_backprop_data, output_c_backprop_data,
+        input_backprop_data, input_h_backprop_data, input_c_backprop_data,
+        params_backprop_data, reserve_space_data, workspace_allocator,
+        output_profile_result);
+    if (!status && !output_profile_result) {
       SetError();
-      LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support";
     }
+  } else {
+    SetError();
+    LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support";
   }
   return *this;
 }
@@ -5223,23 +5156,21 @@ Stream &Stream::ThenRnnBackward(
     ScratchAllocator *workspace_allocator,
     dnn::ProfileResult *output_profile_result) {
   // TODO(zhengxq): add VLOG PARAM calls.
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      auto status = dnn->DoRnnBackward(
-          this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
-          input_c_desc, input_c_data, params, output_desc, output_data,
-          output_h_desc, output_h_data, output_c_desc, output_c_data,
-          output_backprop_data, output_h_backprop_data, output_c_backprop_data,
-          input_backprop_data, input_h_backprop_data, input_c_backprop_data,
-          params_backprop_data, reserve_space_data, workspace_allocator,
-          output_profile_result);
-      if (!status && !output_profile_result) {
-        SetError();
-      }
-    } else {
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    auto status = dnn->DoRnnBackward(
+        this, rnn_desc, input_desc, input_data, input_h_desc, input_h_data,
+        input_c_desc, input_c_data, params, output_desc, output_data,
+        output_h_desc, output_h_data, output_c_desc, output_c_data,
+        output_backprop_data, output_h_backprop_data, output_c_backprop_data,
+        input_backprop_data, input_h_backprop_data, input_c_backprop_data,
+        params_backprop_data, reserve_space_data, workspace_allocator,
+        output_profile_result);
+    if (!status && !output_profile_result) {
       SetError();
-      LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support";
     }
+  } else {
+    SetError();
+    LOG(WARNING) << "Attempting to call ThenRnnBackward without DNN support";
   }
   return *this;
 }
@@ -5253,28 +5184,26 @@ Stream &Stream::ThenCtcLoss(const dnn::RnnStateTensorDescriptor &probs_desc,
                             const dnn::RnnStateTensorDescriptor &grads_desc,
                             DeviceMemory<float> *grads_data,
                             ScratchAllocator *workspace_allocator) {
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      int ctc_loss_algo_id;
-      auto status =
-          dnn->PrepareForCtcLoss(this, probs_desc, probs_data, grads_desc,
-                                 labels_data, labels_lengths_data,
-                                 input_lengths_data, workspace_allocator,
-                                 &scratch_memory, &ctc_loss_algo_id)
-              .ok();
-      if (status) {
-        status = dnn->DoCtcLoss(this, probs_desc, probs_data, labels_data,
-                                labels_lengths_data, input_lengths_data,
-                                costs_data, grads_desc, grads_data,
-                                &scratch_memory, ctc_loss_algo_id);
-      }
-      if (!status) {
-        SetError();
-      }
-    } else {
-      SetErrorAndLogNoDnnSupport();
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    DeviceMemory<uint8> scratch_memory;
+    int ctc_loss_algo_id;
+    auto status =
+        dnn->PrepareForCtcLoss(this, probs_desc, probs_data, grads_desc,
+                               labels_data, labels_lengths_data,
+                               input_lengths_data, workspace_allocator,
+                               &scratch_memory, &ctc_loss_algo_id)
+            .ok();
+    if (status) {
+      status = dnn->DoCtcLoss(this, probs_desc, probs_data, labels_data,
+                              labels_lengths_data, input_lengths_data,
+                              costs_data, grads_desc, grads_data,
+                              &scratch_memory, ctc_loss_algo_id);
     }
+    if (!status) {
+      SetError();
+    }
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -5288,14 +5217,12 @@ Stream &Stream::ThenTransformTensor(const dnn::BatchDescriptor &input_desc,
   VLOG_CALL(PARAM(input_desc), PARAM(input_type), PARAM(input_data),
             PARAM(output_desc), PARAM(output_type), PARAM(scale),
             PARAM(output_data));
-  if (ok()) {
-    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      CheckError(dnn->DoTransformTensor(this, input_desc, input_type,
-                                        input_data, output_desc, output_type,
-                                        scale, output_data));
-    } else {
-      SetErrorAndLogNoDnnSupport();
-    }
+  if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+    CheckError(dnn->DoTransformTensor(this, input_desc, input_type, input_data,
+                                      output_desc, output_type, scale,
+                                      output_data));
+  } else {
+    SetErrorAndLogNoDnnSupport();
   }
   return *this;
 }
@@ -5342,15 +5269,13 @@ Stream &Stream::ThenFft(fft::Plan *plan,
                         DeviceMemory<std::complex<float>> *output) {
   VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output));
 
-  if (ok()) {
-    if (fft::FftSupport *fft = parent_->AsFft()) {
-      CheckError(fft->DoFft(this, plan, input, output));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform FFT operation using StreamExecutor"
-                   " without FFT support";
-    }
+  if (fft::FftSupport *fft = parent_->AsFft()) {
+    CheckError(fft->DoFft(this, plan, input, output));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform FFT operation using StreamExecutor"
+                 " without FFT support";
   }
   return *this;
 }
@@ -5360,15 +5285,13 @@ Stream &Stream::ThenFft(fft::Plan *plan,
                         DeviceMemory<std::complex<double>> *output) {
   VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output));
 
-  if (ok()) {
-    if (fft::FftSupport *fft = parent_->AsFft()) {
-      CheckError(fft->DoFft(this, plan, input, output));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform FFT operation using StreamExecutor"
-                   " without FFT support";
-    }
+  if (fft::FftSupport *fft = parent_->AsFft()) {
+    CheckError(fft->DoFft(this, plan, input, output));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform FFT operation using StreamExecutor"
+                 " without FFT support";
   }
   return *this;
 }
@@ -5377,15 +5300,13 @@ Stream &Stream::ThenFft(fft::Plan *plan, const DeviceMemory<float> &input,
                         DeviceMemory<std::complex<float>> *output) {
   VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output));
 
-  if (ok()) {
-    if (fft::FftSupport *fft = parent_->AsFft()) {
-      CheckError(fft->DoFft(this, plan, input, output));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform FFT operation using StreamExecutor"
-                   " without FFT support";
-    }
+  if (fft::FftSupport *fft = parent_->AsFft()) {
+    CheckError(fft->DoFft(this, plan, input, output));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform FFT operation using StreamExecutor"
+                 " without FFT support";
   }
   return *this;
 }
@@ -5394,15 +5315,13 @@ Stream &Stream::ThenFft(fft::Plan *plan, const DeviceMemory<double> &input,
                         DeviceMemory<std::complex<double>> *output) {
   VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output));
 
-  if (ok()) {
-    if (fft::FftSupport *fft = parent_->AsFft()) {
-      CheckError(fft->DoFft(this, plan, input, output));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform FFT operation using StreamExecutor"
-                   " without FFT support";
-    }
+  if (fft::FftSupport *fft = parent_->AsFft()) {
+    CheckError(fft->DoFft(this, plan, input, output));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform FFT operation using StreamExecutor"
+                 " without FFT support";
   }
   return *this;
 }
@@ -5412,15 +5331,13 @@ Stream &Stream::ThenFft(fft::Plan *plan,
                         DeviceMemory<float> *output) {
   VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output));
 
-  if (ok()) {
-    if (fft::FftSupport *fft = parent_->AsFft()) {
-      CheckError(fft->DoFft(this, plan, input, output));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform FFT operation using StreamExecutor"
-                   " without FFT support";
-    }
+  if (fft::FftSupport *fft = parent_->AsFft()) {
+    CheckError(fft->DoFft(this, plan, input, output));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform FFT operation using StreamExecutor"
+                 " without FFT support";
   }
   return *this;
 }
@@ -5430,15 +5347,13 @@ Stream &Stream::ThenFft(fft::Plan *plan,
                         DeviceMemory<double> *output) {
   VLOG_CALL(PARAM(plan), PARAM(input), PARAM(output));
 
-  if (ok()) {
-    if (fft::FftSupport *fft = parent_->AsFft()) {
-      CheckError(fft->DoFft(this, plan, input, output));
-    } else {
-      SetError();
-      LOG(INFO) << DebugStreamPointers()
-                << " attempting to perform FFT operation using StreamExecutor"
-                   " without FFT support";
-    }
+  if (fft::FftSupport *fft = parent_->AsFft()) {
+    CheckError(fft->DoFft(this, plan, input, output));
+  } else {
+    SetError();
+    LOG(INFO) << DebugStreamPointers()
+              << " attempting to perform FFT operation using StreamExecutor"
+                 " without FFT support";
   }
   return *this;
 }
@@ -5500,7 +5415,7 @@ void Stream::CheckStatus(port::Status status) {
   }
   LOG(ERROR) << status;
   absl::MutexLock lock(&mu_);
-  ok_ = false;
+  status_ = status;
 }
 
 }  // namespace stream_executor
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index bf727d63da2..bfe442641ad 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -2026,7 +2026,7 @@ class Stream {
 
   bool InErrorState() const TF_LOCKS_EXCLUDED(mu_) {
     absl::ReaderMutexLock lock(&mu_);
-    return !ok_;
+    return !status_.ok();
   }
 
   // Sets the error state if operation_retcode is false.
@@ -2036,7 +2036,7 @@ class Stream {
       return;
     }
     absl::MutexLock lock(&mu_);
-    ok_ = false;
+    status_ = port::InternalError("Unknown error");
   }
 
   // Checks the status and logs the error message, if any.
@@ -2070,9 +2070,8 @@ class Stream {
   // See StreamExecutor::AllocateStream.
   bool allocated_ TF_GUARDED_BY(mu_);
 
-  // Whether all operations have entrained successfully to the current program
-  // point.
-  bool ok_ TF_GUARDED_BY(mu_);
+  // The last error (if any) of all method calls.
+  port::Status status_ TF_GUARDED_BY(mu_);
 
   // Sub-streams that are generated from this stream. Each element has a pointer
   // to sub-stream and a boolean value indicating if this substream is ready to