Roll forward the original CL.

PiperOrigin-RevId: 330857639
Change-Id: I1d6da52a031deb5962d472e800649f4d5d14c6d9
This commit is contained in:
Tim Shen 2020-09-09 20:51:11 -07:00 committed by TensorFlower Gardener
parent a9104043a8
commit f046d2f214
7 changed files with 1082 additions and 1181 deletions

View File

@ -3703,7 +3703,7 @@ port::Status CudnnSupport::DoBatchNormalizationBackwardImpl(
return port::Status::OK();
}
bool CudnnSupport::DoFusedConvolve(
port::Status CudnnSupport::DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<double>& conv_input_data, double conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -3716,18 +3716,16 @@ bool CudnnSupport::DoFusedConvolve(
DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return IsStatusOk(
DoFusedConvolveImpl(
stream, conv_input_descriptor, conv_input_data, conv_input_scale,
filter_descriptor, filter_data, convolution_descriptor,
side_input_data, side_input_scale, bias_descriptor, biases,
activation_mode, output_descriptor, output_data,
GetConvAccumulatorType(dnn::DataType::kDouble), scratch_allocator,
algorithm_config, output_profile_result),
/*report_error=*/!output_profile_result);
return DoFusedConvolveImpl(
stream, conv_input_descriptor, conv_input_data, conv_input_scale,
filter_descriptor, filter_data, convolution_descriptor, side_input_data,
side_input_scale, bias_descriptor, biases, activation_mode,
output_descriptor, output_data,
GetConvAccumulatorType(dnn::DataType::kDouble), scratch_allocator,
algorithm_config, output_profile_result);
}
bool CudnnSupport::DoFusedConvolve(
port::Status CudnnSupport::DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<float>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -3740,18 +3738,16 @@ bool CudnnSupport::DoFusedConvolve(
DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return IsStatusOk(
DoFusedConvolveImpl(
stream, conv_input_descriptor, conv_input_data, conv_input_scale,
filter_descriptor, filter_data, convolution_descriptor,
side_input_data, side_input_scale, bias_descriptor, biases,
activation_mode, output_descriptor, output_data,
GetConvAccumulatorType(dnn::DataType::kFloat), scratch_allocator,
algorithm_config, output_profile_result),
/*report_error=*/!output_profile_result);
return DoFusedConvolveImpl(
stream, conv_input_descriptor, conv_input_data, conv_input_scale,
filter_descriptor, filter_data, convolution_descriptor, side_input_data,
side_input_scale, bias_descriptor, biases, activation_mode,
output_descriptor, output_data,
GetConvAccumulatorType(dnn::DataType::kFloat), scratch_allocator,
algorithm_config, output_profile_result);
}
bool CudnnSupport::DoFusedConvolve(
port::Status CudnnSupport::DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -3765,18 +3761,16 @@ bool CudnnSupport::DoFusedConvolve(
DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return IsStatusOk(
DoFusedConvolveImpl(
stream, conv_input_descriptor, conv_input_data, conv_input_scale,
filter_descriptor, filter_data, convolution_descriptor,
side_input_data, side_input_scale, bias_descriptor, biases,
activation_mode, output_descriptor, output_data,
GetConvAccumulatorType(dnn::DataType::kHalf), scratch_allocator,
algorithm_config, output_profile_result),
/*report_error=*/!output_profile_result);
return DoFusedConvolveImpl(
stream, conv_input_descriptor, conv_input_data, conv_input_scale,
filter_descriptor, filter_data, convolution_descriptor, side_input_data,
side_input_scale, bias_descriptor, biases, activation_mode,
output_descriptor, output_data,
GetConvAccumulatorType(dnn::DataType::kHalf), scratch_allocator,
algorithm_config, output_profile_result);
}
bool CudnnSupport::DoFusedConvolve(
port::Status CudnnSupport::DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -3793,23 +3787,21 @@ bool CudnnSupport::DoFusedConvolve(
std::tie(cc_major, cc_minor) = GetCcMajorMinor(stream);
if (cc_major < 6 || (cc_major == 6 && cc_minor < 1)) {
LOG(WARNING) << "cudnnConvolutionBiasActivationForward() for int8 is only "
"supported on GPUs with compute capability 6.1 or later.";
return false;
return port::UnimplementedError(
"cudnnConvolutionBiasActivationForward() for int8 is only supported on "
"GPUs with compute capability 6.1 or later.");
}
return IsStatusOk(
DoFusedConvolveImpl(
stream, conv_input_descriptor, conv_input_data, conv_input_scale,
filter_descriptor, filter_data, convolution_descriptor,
side_input_data, side_input_scale, bias_descriptor, biases,
activation_mode, output_descriptor, output_data,
GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator,
algorithm_config, output_profile_result),
/*report_error=*/!output_profile_result);
return DoFusedConvolveImpl(
stream, conv_input_descriptor, conv_input_data, conv_input_scale,
filter_descriptor, filter_data, convolution_descriptor, side_input_data,
side_input_scale, bias_descriptor, biases, activation_mode,
output_descriptor, output_data,
GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator,
algorithm_config, output_profile_result);
}
bool CudnnSupport::DoFusedConvolve(
port::Status CudnnSupport::DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -3826,20 +3818,18 @@ bool CudnnSupport::DoFusedConvolve(
stream->parent()->GetDeviceDescription().cuda_compute_capability(&cc_major,
&cc_minor);
if (cc_major < 6 || (cc_major == 6 && cc_minor < 1)) {
LOG(WARNING) << "cudnnConvolutionBiasActivationForward() for int8 is only "
"supported on GPUs with compute capability 6.1 or later.";
return false;
return port::UnimplementedError(
"cudnnConvolutionBiasActivationForward() for int8 is only supported on "
"GPUs with compute capability 6.1 or later.");
}
return IsStatusOk(
DoFusedConvolveImpl(
stream, conv_input_descriptor, conv_input_data, conv_input_scale,
filter_descriptor, filter_data, convolution_descriptor,
side_input_data, side_input_scale, bias_descriptor, biases,
activation_mode, output_descriptor, output_data,
GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator,
algorithm_config, output_profile_result),
/*report_error=*/!output_profile_result);
return DoFusedConvolveImpl(
stream, conv_input_descriptor, conv_input_data, conv_input_scale,
filter_descriptor, filter_data, convolution_descriptor, side_input_data,
side_input_scale, bias_descriptor, biases, activation_mode,
output_descriptor, output_data,
GetConvAccumulatorType(dnn::DataType::kInt8), scratch_allocator,
algorithm_config, output_profile_result);
}
port::Status CudnnSupport::DoPrepareForCtcLoss(

View File

@ -277,7 +277,7 @@ class CudnnSupport : public dnn::DnnSupport {
dnn::AlgorithmDesc algorithm_desc, DeviceMemory<uint8> scratch_memory,
dnn::ProfileResult* output_profile_result) override;
bool DoFusedConvolve(
port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<double>& conv_input_data, double conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -291,7 +291,7 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
bool DoFusedConvolve(
port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<float>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -305,25 +305,23 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
bool DoFusedConvolve(Stream* stream,
const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<Eigen::half>& conv_input_data,
float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<Eigen::half>& side_input_data,
float side_input_scale,
const dnn::BatchDescriptor& bias_descriptor,
const DeviceMemory<Eigen::half>& biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<Eigen::half>& side_input_data, float side_input_scale,
const dnn::BatchDescriptor& bias_descriptor,
const DeviceMemory<Eigen::half>& biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
bool DoFusedConvolve(
port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -337,7 +335,7 @@ class CudnnSupport : public dnn::DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
bool DoFusedConvolve(
port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,

View File

@ -1163,7 +1163,7 @@ class DnnSupport {
// that if the inverse of the filter is applied to the output in VALID mode
// the result is the same size as the input - this requires even more
// padding of the input.
virtual bool DoFusedConvolve(
virtual port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<double>& conv_input_data, double conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -1176,11 +1176,12 @@ class DnnSupport {
DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return false;
return port::UnimplementedError(
"DnnSupport::DoFusedConvolve not implemented on this platform.");
}
// This is the float version of DoFusedConvolve.
virtual bool DoFusedConvolve(
virtual port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<float>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -1193,12 +1194,13 @@ class DnnSupport {
DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return false;
return port::UnimplementedError(
"DnnSupport::DoFusedConvolve not implemented on this platform.");
}
// This is the Eigen::half version of DoFusedConvolve.
// The scaling parameters are still floats.
virtual bool DoFusedConvolve(
virtual port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -1213,12 +1215,13 @@ class DnnSupport {
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return false;
return port::UnimplementedError(
"DnnSupport::DoFusedConvolve not implemented on this platform.");
}
// This is the int8 version of DoFusedConvolve.
// The bias input and scaling parameters are floats.
virtual bool DoFusedConvolve(
virtual port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -1231,12 +1234,13 @@ class DnnSupport {
DeviceMemory<int8>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
return false;
return port::UnimplementedError(
"DnnSupport::DoFusedConvolve not implemented on this platform.");
}
// This is the int8 version of DoFusedConvolve.
// The output, bias input and scaling parameters are floats.
virtual bool DoFusedConvolve(
virtual port::Status DoFusedConvolve(
Stream* /*stream*/, const dnn::BatchDescriptor& /*conv_input_descriptor*/,
const DeviceMemory<int8>& /*conv_input_data*/, float /*conv_input_scale*/,
const dnn::FilterDescriptor& /*filter_descriptor*/,
@ -1252,7 +1256,8 @@ class DnnSupport {
ScratchAllocator* /*scratch_allocator*/,
const dnn::AlgorithmConfig& /*algorithm_config*/,
dnn::ProfileResult* /*output_profile_result*/) {
return false;
return port::UnimplementedError(
"DnnSupport::DoFusedConvolve not implemented on this platform.");
}
template <typename ElementType, typename OutputType>

View File

@ -3680,7 +3680,7 @@ bool MIOpenSupport::DoBatchNormalizationBackwardImpl(
return true;
}
bool MIOpenSupport::DoFusedConvolve(
port::Status MIOpenSupport::DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<double>& conv_input_data, double conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -3693,11 +3693,10 @@ bool MIOpenSupport::DoFusedConvolve(
DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
LOG(ERROR) << "fused convolve not implemented yet";
return false;
return port::UnimplementedError("fused convolve not implemented yet");
}
bool MIOpenSupport::DoFusedConvolve(
port::Status MIOpenSupport::DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<float>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -3710,11 +3709,10 @@ bool MIOpenSupport::DoFusedConvolve(
DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
LOG(ERROR) << "fused convolve not implemented yet";
return false;
return port::UnimplementedError("fused convolve not implemented yet");
}
bool MIOpenSupport::DoFusedConvolve(
port::Status MIOpenSupport::DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -3728,11 +3726,10 @@ bool MIOpenSupport::DoFusedConvolve(
DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
LOG(ERROR) << "fused convolve not implemented yet";
return false;
return port::UnimplementedError("fused convolve not implemented yet");
}
bool MIOpenSupport::DoFusedConvolve(
port::Status MIOpenSupport::DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -3745,8 +3742,7 @@ bool MIOpenSupport::DoFusedConvolve(
DeviceMemory<int8>* output_data, ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) {
LOG(ERROR) << "fused convolve not implemented yet";
return false;
return port::UnimplementedError("fused convolve not implemented yet");
}
bool MIOpenSupport::DoTransformTensor(Stream* stream,

View File

@ -315,7 +315,7 @@ class MIOpenSupport : public dnn::DnnSupport {
dnn::AlgorithmDesc algorithm_desc, DeviceMemory<uint8> scratch_memory,
dnn::ProfileResult* output_profile_result) override;
bool DoFusedConvolve(
port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<double>& conv_input_data, double conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -329,7 +329,7 @@ class MIOpenSupport : public dnn::DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
bool DoFusedConvolve(
port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<float>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
@ -343,25 +343,23 @@ class MIOpenSupport : public dnn::DnnSupport {
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
bool DoFusedConvolve(Stream* stream,
const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<Eigen::half>& conv_input_data,
float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<Eigen::half>& side_input_data,
float side_input_scale,
const dnn::BatchDescriptor& bias_descriptor,
const DeviceMemory<Eigen::half>& biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<Eigen::half>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,
const DeviceMemory<Eigen::half>& filter_data,
const dnn::ConvolutionDescriptor& convolution_descriptor,
const DeviceMemory<Eigen::half>& side_input_data, float side_input_scale,
const dnn::BatchDescriptor& bias_descriptor,
const DeviceMemory<Eigen::half>& biases,
dnn::ActivationMode activation_mode,
const dnn::BatchDescriptor& output_descriptor,
DeviceMemory<Eigen::half>* output_data,
ScratchAllocator* scratch_allocator,
const dnn::AlgorithmConfig& algorithm_config,
dnn::ProfileResult* output_profile_result) override;
bool DoFusedConvolve(
port::Status DoFusedConvolve(
Stream* stream, const dnn::BatchDescriptor& conv_input_descriptor,
const DeviceMemory<int8>& conv_input_data, float conv_input_scale,
const dnn::FilterDescriptor& filter_descriptor,

File diff suppressed because it is too large Load Diff

View File

@ -2026,7 +2026,7 @@ class Stream {
bool InErrorState() const TF_LOCKS_EXCLUDED(mu_) {
absl::ReaderMutexLock lock(&mu_);
return !ok_;
return !status_.ok();
}
// Sets the error state if operation_retcode is false.
@ -2036,7 +2036,7 @@ class Stream {
return;
}
absl::MutexLock lock(&mu_);
ok_ = false;
status_ = port::InternalError("Unknown error");
}
// Checks the status and logs the error message, if any.
@ -2070,9 +2070,8 @@ class Stream {
// See StreamExecutor::AllocateStream.
bool allocated_ TF_GUARDED_BY(mu_);
// Whether all operations have entrained successfully to the current program
// point.
bool ok_ TF_GUARDED_BY(mu_);
// The last error (if any) of all method calls.
port::Status status_ TF_GUARDED_BY(mu_);
// Sub-streams that are generated from this stream. Each element has a pointer
// to sub-stream and a boolean value indicating if this substream is ready to