StreamExecutor add CUDA support for cudnnConvolutionBackwardBias

Change: 123233121
This commit is contained in:
A. Unique TensorFlower 2016-05-25 10:15:48 -08:00 committed by TensorFlower Gardener
parent 8515a76345
commit a9f3979264
5 changed files with 205 additions and 0 deletions
tensorflow/stream_executor

View File

@ -184,6 +184,7 @@ bool IsCudnnR2() {
__macro(cudnnSetStream) \
__macro(cudnnActivationForward) \
__macro(cudnnConvolutionForward) \
__macro(cudnnConvolutionBackwardBias) \
__macro(cudnnGetConvolutionForwardWorkspaceSize) \
__macro(cudnnTransformTensor) \
__macro(cudnnSetConvolutionNdDescriptor) \
@ -1493,6 +1494,72 @@ bool CudnnSupport::DoConvolveBackwardFilter(
algorithm, output_profile_result);
}
template <class T>
bool CudnnSupport::DoConvolveBackwardBiasImpl(
Stream* stream, int cudnn_type, // Actually cudnnDataType_t.
const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<T>& input_data,
const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<T>* backward_bias_data) {
mutex_lock lock{dnn_handle_mutex_};
auto status = dynload::cudnnSetStream(parent_, ToHandle(dnn_handle_),
AsCUDAStreamValue(stream));
if (status != CUDNN_STATUS_SUCCESS) {
LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status);
}
ScopedTensorDescriptor input_nd{parent_, input_descriptor,
static_cast<cudnnDataType_t>(cudnn_type)};
ScopedTensorDescriptor bias_nd{parent_, bias_descriptor,
static_cast<cudnnDataType_t>(cudnn_type)};
// Alpha is the scaling factor for input.
float alpha = 1.0;
// Beta is the scaling factor for output.
float beta = 0.0;
status = dynload::cudnnConvolutionBackwardBias(
parent_, ToHandle(dnn_handle_), &alpha, input_nd.handle(),
input_data.opaque(), &beta, bias_nd.handle(),
backward_bias_data->opaque());
if (status != CUDNN_STATUS_SUCCESS) {
LOG(FATAL) << "failed to enqueue backward convolution on stream: "
<< ToString(status);
return false;
}
return true;
}
bool CudnnSupport::DoConvolveBackwardBias(
Stream* stream, const BatchDescriptor& input_descriptor,
const DeviceMemory<double>& input_data,
const BatchDescriptor& bias_descriptor,
DeviceMemory<double>* backward_bias_data) {
return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_DOUBLE, input_descriptor,
input_data, bias_descriptor,
backward_bias_data);
}
bool CudnnSupport::DoConvolveBackwardBias(
Stream* stream, const BatchDescriptor& input_descriptor,
const DeviceMemory<float>& input_data,
const BatchDescriptor& bias_descriptor,
DeviceMemory<float>* backward_bias_data) {
return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_FLOAT, input_descriptor,
input_data, bias_descriptor,
backward_bias_data);
}
bool CudnnSupport::DoConvolveBackwardBias(
Stream* stream, const BatchDescriptor& input_descriptor,
const DeviceMemory<Eigen::half>& input_data,
const BatchDescriptor& bias_descriptor,
DeviceMemory<Eigen::half>* backward_bias_data) {
return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_HALF, input_descriptor,
input_data, bias_descriptor,
backward_bias_data);
}
bool CudnnSupport::DoMatMul(Stream* stream,
const DeviceMemory<float>& input_data,
const DeviceMemory<float>& weights,

View File

@ -140,6 +140,24 @@ class CudnnSupport : public dnn::DnnSupport {
ScratchAllocator* scratch_allocator, dnn::AlgorithmType algorithm,
dnn::ProfileResult* output_profile_result) override;
bool DoConvolveBackwardBias(
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<double>& input_data,
const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<double>* backward_bias_data) override;
bool DoConvolveBackwardBias(Stream* stream,
const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<float>& input_data,
const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<float>* backward_bias_data) override;
bool DoConvolveBackwardBias(
Stream* stream, const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<Eigen::half>& input_data,
const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<Eigen::half>* backward_bias_data) override;
bool DoMatMul(Stream* stream, const DeviceMemory<float>& input_data,
const DeviceMemory<float>& weights,
const dnn::BatchDescriptor& input_dimensions,
@ -311,6 +329,14 @@ class CudnnSupport : public dnn::DnnSupport {
dnn::AlgorithmType algorithm,
dnn::ProfileResult* output_profile_result);
template <class T>
bool DoConvolveBackwardBiasImpl(Stream* stream,
int cudnn_type, // Actually cudnnDataType_t.
const dnn::BatchDescriptor& input_descriptor,
const DeviceMemory<T>& input_data,
const dnn::BatchDescriptor& bias_descriptor,
DeviceMemory<T>* backward_bias_data);
SE_DISALLOW_COPY_AND_ASSIGN(CudnnSupport);
};

View File

@ -849,6 +849,43 @@ class DnnSupport {
ScratchAllocator* scratch_allocator, AlgorithmType algorithm,
ProfileResult* output_profile_result) = 0;
// Enqueues a single-precision backward convolution (for bias) operation onto
// the stream.
//
// Arguments:
// stream: borrowed pointer to the stream that the 'convolve' operation
// should be enqueued onto.
// input_descriptor: dimensions of the input layer.
// input_data: un-owned device memory region which contains the
// convolution input.
// bias_descriptor: dimensions of the bias tensor. Should be the same as the
// input dimensions, but with the spatial dimensions set to 1.
// backward_filter_data: un-owned device memory region in which to place the
// backprop of the bias.
virtual bool DoConvolveBackwardBias(Stream* stream,
const BatchDescriptor& input_descriptor,
const DeviceMemory<float>& input_data,
const BatchDescriptor& bias_descriptor,
DeviceMemory<float>* backward_bias_data) {
return false;
}
virtual bool DoConvolveBackwardBias(
Stream* stream, const BatchDescriptor& input_descriptor,
const DeviceMemory<double>& input_data,
const BatchDescriptor& bias_descriptor,
DeviceMemory<double>* backward_bias_data) {
return false;
}
virtual bool DoConvolveBackwardBias(
Stream* stream, const BatchDescriptor& input_descriptor,
const DeviceMemory<Eigen::half>& input_data,
const BatchDescriptor& bias_descriptor,
DeviceMemory<Eigen::half>* backward_bias_data) {
return false;
}
// Fully connects the "nodes" (float values) in input_data with
// shape input_dimensions to output_data with output_dimensions
// using provided weights. This is equivalent to computing a matrix

View File

@ -741,6 +741,57 @@ Stream &Stream::ThenConvolveBackwardFilter(
/*scratch_allocator=*/nullptr);
}
template <typename T>
Stream &Stream::ThenConvolveBackwardBiasImpl(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<T> &input_data,
const dnn::BatchDescriptor &bias_descriptor,
DeviceMemory<T> *backward_bias_data) {
VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), PARAM(bias_descriptor),
PARAM(backward_bias_data));
if (ok()) {
if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
CheckError(dnn->DoConvolveBackwardBias(this, input_descriptor, input_data,
bias_descriptor,
backward_bias_data));
} else {
SetError();
LOG(WARNING)
<< "attempting to perform DNN operation using StreamExecutor "
"without DNN support";
}
}
return *this;
}
Stream &Stream::ThenConvolveBackwardBias(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<double> &input_data,
const dnn::BatchDescriptor &bias_descriptor,
DeviceMemory<double> *backward_bias_data) {
return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
bias_descriptor, backward_bias_data);
}
Stream &Stream::ThenConvolveBackwardBias(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,
const dnn::BatchDescriptor &bias_descriptor,
DeviceMemory<float> *backward_bias_data) {
return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
bias_descriptor, backward_bias_data);
}
Stream &Stream::ThenConvolveBackwardBias(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<Eigen::half> &input_data,
const dnn::BatchDescriptor &bias_descriptor,
DeviceMemory<Eigen::half> *backward_bias_data) {
return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
bias_descriptor, backward_bias_data);
}
Stream &Stream::ThenMatMul(const DeviceMemory<float> &input_data,
const DeviceMemory<float> &weights,
const dnn::BatchDescriptor &input_dimensions,

View File

@ -371,6 +371,22 @@ class Stream {
ScratchAllocator *scratch_allocator, dnn::AlgorithmType algorithm,
dnn::ProfileResult *output_profile_result);
Stream &ThenConvolveBackwardBias(const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<double> &input_data,
const dnn::BatchDescriptor &bias_descriptor,
DeviceMemory<double> *backward_bias_data);
Stream &ThenConvolveBackwardBias(const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<float> &input_data,
const dnn::BatchDescriptor &bias_descriptor,
DeviceMemory<float> *backward_bias_data);
Stream &ThenConvolveBackwardBias(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<Eigen::half> &input_data,
const dnn::BatchDescriptor &bias_descriptor,
DeviceMemory<Eigen::half> *backward_bias_data);
Stream &ThenMatMul(const DeviceMemory<float> &input_data,
const DeviceMemory<float> &weights,
const dnn::BatchDescriptor &input_dimensions,
@ -1439,6 +1455,14 @@ class Stream {
// BlockHostUntilDone() is called.
internal::TemporaryMemoryManager temporary_memory_manager_;
// Implementation of ThenConvolveBackwardBias that is shared by all types.
template <typename T>
Stream &ThenConvolveBackwardBiasImpl(
const dnn::BatchDescriptor &input_descriptor,
const DeviceMemory<T> &input_data,
const dnn::BatchDescriptor &bias_descriptor,
DeviceMemory<T> *backward_bias_data);
SE_DISALLOW_COPY_AND_ASSIGN(Stream);
};