StreamExecutor add CUDA support for cudnnConvolutionBackwardBias
Change: 123233121
This commit is contained in:
parent
8515a76345
commit
a9f3979264
tensorflow/stream_executor
@ -184,6 +184,7 @@ bool IsCudnnR2() {
|
||||
__macro(cudnnSetStream) \
|
||||
__macro(cudnnActivationForward) \
|
||||
__macro(cudnnConvolutionForward) \
|
||||
__macro(cudnnConvolutionBackwardBias) \
|
||||
__macro(cudnnGetConvolutionForwardWorkspaceSize) \
|
||||
__macro(cudnnTransformTensor) \
|
||||
__macro(cudnnSetConvolutionNdDescriptor) \
|
||||
@ -1493,6 +1494,72 @@ bool CudnnSupport::DoConvolveBackwardFilter(
|
||||
algorithm, output_profile_result);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
bool CudnnSupport::DoConvolveBackwardBiasImpl(
|
||||
Stream* stream, int cudnn_type, // Actually cudnnDataType_t.
|
||||
const dnn::BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<T>& input_data,
|
||||
const dnn::BatchDescriptor& bias_descriptor,
|
||||
DeviceMemory<T>* backward_bias_data) {
|
||||
mutex_lock lock{dnn_handle_mutex_};
|
||||
auto status = dynload::cudnnSetStream(parent_, ToHandle(dnn_handle_),
|
||||
AsCUDAStreamValue(stream));
|
||||
if (status != CUDNN_STATUS_SUCCESS) {
|
||||
LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status);
|
||||
}
|
||||
|
||||
ScopedTensorDescriptor input_nd{parent_, input_descriptor,
|
||||
static_cast<cudnnDataType_t>(cudnn_type)};
|
||||
ScopedTensorDescriptor bias_nd{parent_, bias_descriptor,
|
||||
static_cast<cudnnDataType_t>(cudnn_type)};
|
||||
|
||||
// Alpha is the scaling factor for input.
|
||||
float alpha = 1.0;
|
||||
// Beta is the scaling factor for output.
|
||||
float beta = 0.0;
|
||||
|
||||
status = dynload::cudnnConvolutionBackwardBias(
|
||||
parent_, ToHandle(dnn_handle_), &alpha, input_nd.handle(),
|
||||
input_data.opaque(), &beta, bias_nd.handle(),
|
||||
backward_bias_data->opaque());
|
||||
if (status != CUDNN_STATUS_SUCCESS) {
|
||||
LOG(FATAL) << "failed to enqueue backward convolution on stream: "
|
||||
<< ToString(status);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoConvolveBackwardBias(
|
||||
Stream* stream, const BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<double>& input_data,
|
||||
const BatchDescriptor& bias_descriptor,
|
||||
DeviceMemory<double>* backward_bias_data) {
|
||||
return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_DOUBLE, input_descriptor,
|
||||
input_data, bias_descriptor,
|
||||
backward_bias_data);
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoConvolveBackwardBias(
|
||||
Stream* stream, const BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<float>& input_data,
|
||||
const BatchDescriptor& bias_descriptor,
|
||||
DeviceMemory<float>* backward_bias_data) {
|
||||
return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_FLOAT, input_descriptor,
|
||||
input_data, bias_descriptor,
|
||||
backward_bias_data);
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoConvolveBackwardBias(
|
||||
Stream* stream, const BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const BatchDescriptor& bias_descriptor,
|
||||
DeviceMemory<Eigen::half>* backward_bias_data) {
|
||||
return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_HALF, input_descriptor,
|
||||
input_data, bias_descriptor,
|
||||
backward_bias_data);
|
||||
}
|
||||
|
||||
bool CudnnSupport::DoMatMul(Stream* stream,
|
||||
const DeviceMemory<float>& input_data,
|
||||
const DeviceMemory<float>& weights,
|
||||
|
@ -140,6 +140,24 @@ class CudnnSupport : public dnn::DnnSupport {
|
||||
ScratchAllocator* scratch_allocator, dnn::AlgorithmType algorithm,
|
||||
dnn::ProfileResult* output_profile_result) override;
|
||||
|
||||
bool DoConvolveBackwardBias(
|
||||
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<double>& input_data,
|
||||
const dnn::BatchDescriptor& bias_descriptor,
|
||||
DeviceMemory<double>* backward_bias_data) override;
|
||||
|
||||
bool DoConvolveBackwardBias(Stream* stream,
|
||||
const dnn::BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<float>& input_data,
|
||||
const dnn::BatchDescriptor& bias_descriptor,
|
||||
DeviceMemory<float>* backward_bias_data) override;
|
||||
|
||||
bool DoConvolveBackwardBias(
|
||||
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const dnn::BatchDescriptor& bias_descriptor,
|
||||
DeviceMemory<Eigen::half>* backward_bias_data) override;
|
||||
|
||||
bool DoMatMul(Stream* stream, const DeviceMemory<float>& input_data,
|
||||
const DeviceMemory<float>& weights,
|
||||
const dnn::BatchDescriptor& input_dimensions,
|
||||
@ -311,6 +329,14 @@ class CudnnSupport : public dnn::DnnSupport {
|
||||
dnn::AlgorithmType algorithm,
|
||||
dnn::ProfileResult* output_profile_result);
|
||||
|
||||
template <class T>
|
||||
bool DoConvolveBackwardBiasImpl(Stream* stream,
|
||||
int cudnn_type, // Actually cudnnDataType_t.
|
||||
const dnn::BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<T>& input_data,
|
||||
const dnn::BatchDescriptor& bias_descriptor,
|
||||
DeviceMemory<T>* backward_bias_data);
|
||||
|
||||
SE_DISALLOW_COPY_AND_ASSIGN(CudnnSupport);
|
||||
};
|
||||
|
||||
|
@ -849,6 +849,43 @@ class DnnSupport {
|
||||
ScratchAllocator* scratch_allocator, AlgorithmType algorithm,
|
||||
ProfileResult* output_profile_result) = 0;
|
||||
|
||||
// Enqueues a single-precision backward convolution (for bias) operation onto
|
||||
// the stream.
|
||||
//
|
||||
// Arguments:
|
||||
// stream: borrowed pointer to the stream that the 'convolve' operation
|
||||
// should be enqueued onto.
|
||||
// input_descriptor: dimensions of the input layer.
|
||||
// input_data: un-owned device memory region which contains the
|
||||
// convolution input.
|
||||
// bias_descriptor: dimensions of the bias tensor. Should be the same as the
|
||||
// input dimensions, but with the spatial dimensions set to 1.
|
||||
// backward_filter_data: un-owned device memory region in which to place the
|
||||
// backprop of the bias.
|
||||
virtual bool DoConvolveBackwardBias(Stream* stream,
|
||||
const BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<float>& input_data,
|
||||
const BatchDescriptor& bias_descriptor,
|
||||
DeviceMemory<float>* backward_bias_data) {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool DoConvolveBackwardBias(
|
||||
Stream* stream, const BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<double>& input_data,
|
||||
const BatchDescriptor& bias_descriptor,
|
||||
DeviceMemory<double>* backward_bias_data) {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool DoConvolveBackwardBias(
|
||||
Stream* stream, const BatchDescriptor& input_descriptor,
|
||||
const DeviceMemory<Eigen::half>& input_data,
|
||||
const BatchDescriptor& bias_descriptor,
|
||||
DeviceMemory<Eigen::half>* backward_bias_data) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Fully connects the "nodes" (float values) in input_data with
|
||||
// shape input_dimensions to output_data with output_dimensions
|
||||
// using provided weights. This is equivalent to computing a matrix
|
||||
|
@ -741,6 +741,57 @@ Stream &Stream::ThenConvolveBackwardFilter(
|
||||
/*scratch_allocator=*/nullptr);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Stream &Stream::ThenConvolveBackwardBiasImpl(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<T> &input_data,
|
||||
const dnn::BatchDescriptor &bias_descriptor,
|
||||
DeviceMemory<T> *backward_bias_data) {
|
||||
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), PARAM(bias_descriptor),
|
||||
PARAM(backward_bias_data));
|
||||
|
||||
if (ok()) {
|
||||
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
|
||||
CheckError(dnn->DoConvolveBackwardBias(this, input_descriptor, input_data,
|
||||
bias_descriptor,
|
||||
backward_bias_data));
|
||||
} else {
|
||||
SetError();
|
||||
LOG(WARNING)
|
||||
<< "attempting to perform DNN operation using StreamExecutor "
|
||||
"without DNN support";
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Stream &Stream::ThenConvolveBackwardBias(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<double> &input_data,
|
||||
const dnn::BatchDescriptor &bias_descriptor,
|
||||
DeviceMemory<double> *backward_bias_data) {
|
||||
return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
|
||||
bias_descriptor, backward_bias_data);
|
||||
}
|
||||
|
||||
Stream &Stream::ThenConvolveBackwardBias(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
const dnn::BatchDescriptor &bias_descriptor,
|
||||
DeviceMemory<float> *backward_bias_data) {
|
||||
return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
|
||||
bias_descriptor, backward_bias_data);
|
||||
}
|
||||
|
||||
Stream &Stream::ThenConvolveBackwardBias(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<Eigen::half> &input_data,
|
||||
const dnn::BatchDescriptor &bias_descriptor,
|
||||
DeviceMemory<Eigen::half> *backward_bias_data) {
|
||||
return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
|
||||
bias_descriptor, backward_bias_data);
|
||||
}
|
||||
|
||||
Stream &Stream::ThenMatMul(const DeviceMemory<float> &input_data,
|
||||
const DeviceMemory<float> &weights,
|
||||
const dnn::BatchDescriptor &input_dimensions,
|
||||
|
@ -371,6 +371,22 @@ class Stream {
|
||||
ScratchAllocator *scratch_allocator, dnn::AlgorithmType algorithm,
|
||||
dnn::ProfileResult *output_profile_result);
|
||||
|
||||
Stream &ThenConvolveBackwardBias(const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<double> &input_data,
|
||||
const dnn::BatchDescriptor &bias_descriptor,
|
||||
DeviceMemory<double> *backward_bias_data);
|
||||
|
||||
Stream &ThenConvolveBackwardBias(const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<float> &input_data,
|
||||
const dnn::BatchDescriptor &bias_descriptor,
|
||||
DeviceMemory<float> *backward_bias_data);
|
||||
|
||||
Stream &ThenConvolveBackwardBias(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<Eigen::half> &input_data,
|
||||
const dnn::BatchDescriptor &bias_descriptor,
|
||||
DeviceMemory<Eigen::half> *backward_bias_data);
|
||||
|
||||
Stream &ThenMatMul(const DeviceMemory<float> &input_data,
|
||||
const DeviceMemory<float> &weights,
|
||||
const dnn::BatchDescriptor &input_dimensions,
|
||||
@ -1439,6 +1455,14 @@ class Stream {
|
||||
// BlockHostUntilDone() is called.
|
||||
internal::TemporaryMemoryManager temporary_memory_manager_;
|
||||
|
||||
// Implementation of ThenConvolveBackwardBias that is shared by all types.
|
||||
template <typename T>
|
||||
Stream &ThenConvolveBackwardBiasImpl(
|
||||
const dnn::BatchDescriptor &input_descriptor,
|
||||
const DeviceMemory<T> &input_data,
|
||||
const dnn::BatchDescriptor &bias_descriptor,
|
||||
DeviceMemory<T> *backward_bias_data);
|
||||
|
||||
SE_DISALLOW_COPY_AND_ASSIGN(Stream);
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user