StreamExecutor add CUDA support for cudnnConvolutionBackwardBias
Change: 123233121
This commit is contained in:
parent
8515a76345
commit
a9f3979264
@ -184,6 +184,7 @@ bool IsCudnnR2() {
|
|||||||
__macro(cudnnSetStream) \
|
__macro(cudnnSetStream) \
|
||||||
__macro(cudnnActivationForward) \
|
__macro(cudnnActivationForward) \
|
||||||
__macro(cudnnConvolutionForward) \
|
__macro(cudnnConvolutionForward) \
|
||||||
|
__macro(cudnnConvolutionBackwardBias) \
|
||||||
__macro(cudnnGetConvolutionForwardWorkspaceSize) \
|
__macro(cudnnGetConvolutionForwardWorkspaceSize) \
|
||||||
__macro(cudnnTransformTensor) \
|
__macro(cudnnTransformTensor) \
|
||||||
__macro(cudnnSetConvolutionNdDescriptor) \
|
__macro(cudnnSetConvolutionNdDescriptor) \
|
||||||
@ -1493,6 +1494,72 @@ bool CudnnSupport::DoConvolveBackwardFilter(
|
|||||||
algorithm, output_profile_result);
|
algorithm, output_profile_result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
bool CudnnSupport::DoConvolveBackwardBiasImpl(
|
||||||
|
Stream* stream, int cudnn_type, // Actually cudnnDataType_t.
|
||||||
|
const dnn::BatchDescriptor& input_descriptor,
|
||||||
|
const DeviceMemory<T>& input_data,
|
||||||
|
const dnn::BatchDescriptor& bias_descriptor,
|
||||||
|
DeviceMemory<T>* backward_bias_data) {
|
||||||
|
mutex_lock lock{dnn_handle_mutex_};
|
||||||
|
auto status = dynload::cudnnSetStream(parent_, ToHandle(dnn_handle_),
|
||||||
|
AsCUDAStreamValue(stream));
|
||||||
|
if (status != CUDNN_STATUS_SUCCESS) {
|
||||||
|
LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
ScopedTensorDescriptor input_nd{parent_, input_descriptor,
|
||||||
|
static_cast<cudnnDataType_t>(cudnn_type)};
|
||||||
|
ScopedTensorDescriptor bias_nd{parent_, bias_descriptor,
|
||||||
|
static_cast<cudnnDataType_t>(cudnn_type)};
|
||||||
|
|
||||||
|
// Alpha is the scaling factor for input.
|
||||||
|
float alpha = 1.0;
|
||||||
|
// Beta is the scaling factor for output.
|
||||||
|
float beta = 0.0;
|
||||||
|
|
||||||
|
status = dynload::cudnnConvolutionBackwardBias(
|
||||||
|
parent_, ToHandle(dnn_handle_), &alpha, input_nd.handle(),
|
||||||
|
input_data.opaque(), &beta, bias_nd.handle(),
|
||||||
|
backward_bias_data->opaque());
|
||||||
|
if (status != CUDNN_STATUS_SUCCESS) {
|
||||||
|
LOG(FATAL) << "failed to enqueue backward convolution on stream: "
|
||||||
|
<< ToString(status);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CudnnSupport::DoConvolveBackwardBias(
|
||||||
|
Stream* stream, const BatchDescriptor& input_descriptor,
|
||||||
|
const DeviceMemory<double>& input_data,
|
||||||
|
const BatchDescriptor& bias_descriptor,
|
||||||
|
DeviceMemory<double>* backward_bias_data) {
|
||||||
|
return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_DOUBLE, input_descriptor,
|
||||||
|
input_data, bias_descriptor,
|
||||||
|
backward_bias_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CudnnSupport::DoConvolveBackwardBias(
|
||||||
|
Stream* stream, const BatchDescriptor& input_descriptor,
|
||||||
|
const DeviceMemory<float>& input_data,
|
||||||
|
const BatchDescriptor& bias_descriptor,
|
||||||
|
DeviceMemory<float>* backward_bias_data) {
|
||||||
|
return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_FLOAT, input_descriptor,
|
||||||
|
input_data, bias_descriptor,
|
||||||
|
backward_bias_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CudnnSupport::DoConvolveBackwardBias(
|
||||||
|
Stream* stream, const BatchDescriptor& input_descriptor,
|
||||||
|
const DeviceMemory<Eigen::half>& input_data,
|
||||||
|
const BatchDescriptor& bias_descriptor,
|
||||||
|
DeviceMemory<Eigen::half>* backward_bias_data) {
|
||||||
|
return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_HALF, input_descriptor,
|
||||||
|
input_data, bias_descriptor,
|
||||||
|
backward_bias_data);
|
||||||
|
}
|
||||||
|
|
||||||
bool CudnnSupport::DoMatMul(Stream* stream,
|
bool CudnnSupport::DoMatMul(Stream* stream,
|
||||||
const DeviceMemory<float>& input_data,
|
const DeviceMemory<float>& input_data,
|
||||||
const DeviceMemory<float>& weights,
|
const DeviceMemory<float>& weights,
|
||||||
|
@ -140,6 +140,24 @@ class CudnnSupport : public dnn::DnnSupport {
|
|||||||
ScratchAllocator* scratch_allocator, dnn::AlgorithmType algorithm,
|
ScratchAllocator* scratch_allocator, dnn::AlgorithmType algorithm,
|
||||||
dnn::ProfileResult* output_profile_result) override;
|
dnn::ProfileResult* output_profile_result) override;
|
||||||
|
|
||||||
|
bool DoConvolveBackwardBias(
|
||||||
|
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
|
||||||
|
const DeviceMemory<double>& input_data,
|
||||||
|
const dnn::BatchDescriptor& bias_descriptor,
|
||||||
|
DeviceMemory<double>* backward_bias_data) override;
|
||||||
|
|
||||||
|
bool DoConvolveBackwardBias(Stream* stream,
|
||||||
|
const dnn::BatchDescriptor& input_descriptor,
|
||||||
|
const DeviceMemory<float>& input_data,
|
||||||
|
const dnn::BatchDescriptor& bias_descriptor,
|
||||||
|
DeviceMemory<float>* backward_bias_data) override;
|
||||||
|
|
||||||
|
bool DoConvolveBackwardBias(
|
||||||
|
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
|
||||||
|
const DeviceMemory<Eigen::half>& input_data,
|
||||||
|
const dnn::BatchDescriptor& bias_descriptor,
|
||||||
|
DeviceMemory<Eigen::half>* backward_bias_data) override;
|
||||||
|
|
||||||
bool DoMatMul(Stream* stream, const DeviceMemory<float>& input_data,
|
bool DoMatMul(Stream* stream, const DeviceMemory<float>& input_data,
|
||||||
const DeviceMemory<float>& weights,
|
const DeviceMemory<float>& weights,
|
||||||
const dnn::BatchDescriptor& input_dimensions,
|
const dnn::BatchDescriptor& input_dimensions,
|
||||||
@ -311,6 +329,14 @@ class CudnnSupport : public dnn::DnnSupport {
|
|||||||
dnn::AlgorithmType algorithm,
|
dnn::AlgorithmType algorithm,
|
||||||
dnn::ProfileResult* output_profile_result);
|
dnn::ProfileResult* output_profile_result);
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
bool DoConvolveBackwardBiasImpl(Stream* stream,
|
||||||
|
int cudnn_type, // Actually cudnnDataType_t.
|
||||||
|
const dnn::BatchDescriptor& input_descriptor,
|
||||||
|
const DeviceMemory<T>& input_data,
|
||||||
|
const dnn::BatchDescriptor& bias_descriptor,
|
||||||
|
DeviceMemory<T>* backward_bias_data);
|
||||||
|
|
||||||
SE_DISALLOW_COPY_AND_ASSIGN(CudnnSupport);
|
SE_DISALLOW_COPY_AND_ASSIGN(CudnnSupport);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -849,6 +849,43 @@ class DnnSupport {
|
|||||||
ScratchAllocator* scratch_allocator, AlgorithmType algorithm,
|
ScratchAllocator* scratch_allocator, AlgorithmType algorithm,
|
||||||
ProfileResult* output_profile_result) = 0;
|
ProfileResult* output_profile_result) = 0;
|
||||||
|
|
||||||
|
// Enqueues a single-precision backward convolution (for bias) operation onto
|
||||||
|
// the stream.
|
||||||
|
//
|
||||||
|
// Arguments:
|
||||||
|
// stream: borrowed pointer to the stream that the 'convolve' operation
|
||||||
|
// should be enqueued onto.
|
||||||
|
// input_descriptor: dimensions of the input layer.
|
||||||
|
// input_data: un-owned device memory region which contains the
|
||||||
|
// convolution input.
|
||||||
|
// bias_descriptor: dimensions of the bias tensor. Should be the same as the
|
||||||
|
// input dimensions, but with the spatial dimensions set to 1.
|
||||||
|
// backward_filter_data: un-owned device memory region in which to place the
|
||||||
|
// backprop of the bias.
|
||||||
|
virtual bool DoConvolveBackwardBias(Stream* stream,
|
||||||
|
const BatchDescriptor& input_descriptor,
|
||||||
|
const DeviceMemory<float>& input_data,
|
||||||
|
const BatchDescriptor& bias_descriptor,
|
||||||
|
DeviceMemory<float>* backward_bias_data) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool DoConvolveBackwardBias(
|
||||||
|
Stream* stream, const BatchDescriptor& input_descriptor,
|
||||||
|
const DeviceMemory<double>& input_data,
|
||||||
|
const BatchDescriptor& bias_descriptor,
|
||||||
|
DeviceMemory<double>* backward_bias_data) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool DoConvolveBackwardBias(
|
||||||
|
Stream* stream, const BatchDescriptor& input_descriptor,
|
||||||
|
const DeviceMemory<Eigen::half>& input_data,
|
||||||
|
const BatchDescriptor& bias_descriptor,
|
||||||
|
DeviceMemory<Eigen::half>* backward_bias_data) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Fully connects the "nodes" (float values) in input_data with
|
// Fully connects the "nodes" (float values) in input_data with
|
||||||
// shape input_dimensions to output_data with output_dimensions
|
// shape input_dimensions to output_data with output_dimensions
|
||||||
// using provided weights. This is equivalent to computing a matrix
|
// using provided weights. This is equivalent to computing a matrix
|
||||||
|
@ -741,6 +741,57 @@ Stream &Stream::ThenConvolveBackwardFilter(
|
|||||||
/*scratch_allocator=*/nullptr);
|
/*scratch_allocator=*/nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
Stream &Stream::ThenConvolveBackwardBiasImpl(
|
||||||
|
const dnn::BatchDescriptor &input_descriptor,
|
||||||
|
const DeviceMemory<T> &input_data,
|
||||||
|
const dnn::BatchDescriptor &bias_descriptor,
|
||||||
|
DeviceMemory<T> *backward_bias_data) {
|
||||||
|
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), PARAM(bias_descriptor),
|
||||||
|
PARAM(backward_bias_data));
|
||||||
|
|
||||||
|
if (ok()) {
|
||||||
|
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
|
||||||
|
CheckError(dnn->DoConvolveBackwardBias(this, input_descriptor, input_data,
|
||||||
|
bias_descriptor,
|
||||||
|
backward_bias_data));
|
||||||
|
} else {
|
||||||
|
SetError();
|
||||||
|
LOG(WARNING)
|
||||||
|
<< "attempting to perform DNN operation using StreamExecutor "
|
||||||
|
"without DNN support";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Stream &Stream::ThenConvolveBackwardBias(
|
||||||
|
const dnn::BatchDescriptor &input_descriptor,
|
||||||
|
const DeviceMemory<double> &input_data,
|
||||||
|
const dnn::BatchDescriptor &bias_descriptor,
|
||||||
|
DeviceMemory<double> *backward_bias_data) {
|
||||||
|
return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
|
||||||
|
bias_descriptor, backward_bias_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
Stream &Stream::ThenConvolveBackwardBias(
|
||||||
|
const dnn::BatchDescriptor &input_descriptor,
|
||||||
|
const DeviceMemory<float> &input_data,
|
||||||
|
const dnn::BatchDescriptor &bias_descriptor,
|
||||||
|
DeviceMemory<float> *backward_bias_data) {
|
||||||
|
return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
|
||||||
|
bias_descriptor, backward_bias_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
Stream &Stream::ThenConvolveBackwardBias(
|
||||||
|
const dnn::BatchDescriptor &input_descriptor,
|
||||||
|
const DeviceMemory<Eigen::half> &input_data,
|
||||||
|
const dnn::BatchDescriptor &bias_descriptor,
|
||||||
|
DeviceMemory<Eigen::half> *backward_bias_data) {
|
||||||
|
return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
|
||||||
|
bias_descriptor, backward_bias_data);
|
||||||
|
}
|
||||||
|
|
||||||
Stream &Stream::ThenMatMul(const DeviceMemory<float> &input_data,
|
Stream &Stream::ThenMatMul(const DeviceMemory<float> &input_data,
|
||||||
const DeviceMemory<float> &weights,
|
const DeviceMemory<float> &weights,
|
||||||
const dnn::BatchDescriptor &input_dimensions,
|
const dnn::BatchDescriptor &input_dimensions,
|
||||||
|
@ -371,6 +371,22 @@ class Stream {
|
|||||||
ScratchAllocator *scratch_allocator, dnn::AlgorithmType algorithm,
|
ScratchAllocator *scratch_allocator, dnn::AlgorithmType algorithm,
|
||||||
dnn::ProfileResult *output_profile_result);
|
dnn::ProfileResult *output_profile_result);
|
||||||
|
|
||||||
|
Stream &ThenConvolveBackwardBias(const dnn::BatchDescriptor &input_descriptor,
|
||||||
|
const DeviceMemory<double> &input_data,
|
||||||
|
const dnn::BatchDescriptor &bias_descriptor,
|
||||||
|
DeviceMemory<double> *backward_bias_data);
|
||||||
|
|
||||||
|
Stream &ThenConvolveBackwardBias(const dnn::BatchDescriptor &input_descriptor,
|
||||||
|
const DeviceMemory<float> &input_data,
|
||||||
|
const dnn::BatchDescriptor &bias_descriptor,
|
||||||
|
DeviceMemory<float> *backward_bias_data);
|
||||||
|
|
||||||
|
Stream &ThenConvolveBackwardBias(
|
||||||
|
const dnn::BatchDescriptor &input_descriptor,
|
||||||
|
const DeviceMemory<Eigen::half> &input_data,
|
||||||
|
const dnn::BatchDescriptor &bias_descriptor,
|
||||||
|
DeviceMemory<Eigen::half> *backward_bias_data);
|
||||||
|
|
||||||
Stream &ThenMatMul(const DeviceMemory<float> &input_data,
|
Stream &ThenMatMul(const DeviceMemory<float> &input_data,
|
||||||
const DeviceMemory<float> &weights,
|
const DeviceMemory<float> &weights,
|
||||||
const dnn::BatchDescriptor &input_dimensions,
|
const dnn::BatchDescriptor &input_dimensions,
|
||||||
@ -1439,6 +1455,14 @@ class Stream {
|
|||||||
// BlockHostUntilDone() is called.
|
// BlockHostUntilDone() is called.
|
||||||
internal::TemporaryMemoryManager temporary_memory_manager_;
|
internal::TemporaryMemoryManager temporary_memory_manager_;
|
||||||
|
|
||||||
|
// Implementation of ThenConvolveBackwardBias that is shared by all types.
|
||||||
|
template <typename T>
|
||||||
|
Stream &ThenConvolveBackwardBiasImpl(
|
||||||
|
const dnn::BatchDescriptor &input_descriptor,
|
||||||
|
const DeviceMemory<T> &input_data,
|
||||||
|
const dnn::BatchDescriptor &bias_descriptor,
|
||||||
|
DeviceMemory<T> *backward_bias_data);
|
||||||
|
|
||||||
SE_DISALLOW_COPY_AND_ASSIGN(Stream);
|
SE_DISALLOW_COPY_AND_ASSIGN(Stream);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user