Removed TENSOR_OP disable env vars.
* TF_DISABLE_CUBLAS_TENSOR_OP_MATH * TF_DISABLE_CUDNN_TENSOR_OP_MATH * TF_DISABLE_CUDNN_RNN_TENSOR_OP_MATH
This commit is contained in:
parent
f129485019
commit
32d63d0a3e
|
@ -101,18 +101,6 @@ static std::string ToString(cublasStatus_t status) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decide whether to enable TENSOR_OP_MATH
|
|
||||||
static bool TensorOpMathEnabled() {
|
|
||||||
static bool is_enabled = [] {
|
|
||||||
bool is_disabled;
|
|
||||||
TF_CHECK_OK(
|
|
||||||
tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUBLAS_TENSOR_OP_MATH",
|
|
||||||
/*default_val=*/false, &is_disabled));
|
|
||||||
return !is_disabled;
|
|
||||||
}();
|
|
||||||
return is_enabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
// cuBLAS has interfaces that permit pointers to be passed from either the host
|
// cuBLAS has interfaces that permit pointers to be passed from either the host
|
||||||
// memory space or the device memory space; however, you must instruct it as to
|
// memory space or the device memory space; however, you must instruct it as to
|
||||||
// which address space those pointers are in with cublasSetPointerMode.
|
// which address space those pointers are in with cublasSetPointerMode.
|
||||||
|
@ -1640,7 +1628,7 @@ bool CUDABlas::DoBlasGemm(
|
||||||
&cc_minor);
|
&cc_minor);
|
||||||
|
|
||||||
// GPUs < sm_70 don't support tensor ops.
|
// GPUs < sm_70 don't support tensor ops.
|
||||||
if (cc_major >= 7 && TensorOpMathEnabled()) {
|
if (cc_major >= 7) {
|
||||||
use_tensor_ops = true;
|
use_tensor_ops = true;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1921,8 +1909,7 @@ static bool TensorOpsAvailable(int cc_major) {
|
||||||
// strictly correct. We can't simply enable it, though, as that would change
|
// strictly correct. We can't simply enable it, though, as that would change
|
||||||
// clients' behavior significantly: Using tensor ops on fp32 inputs cause them
|
// clients' behavior significantly: Using tensor ops on fp32 inputs cause them
|
||||||
// to be rounded to fp16.
|
// to be rounded to fp16.
|
||||||
if (cc_major >= 7 && TensorOpMathEnabled() &&
|
if (cc_major >= 7 && std::is_same<InType, Eigen::half>::value) {
|
||||||
std::is_same<InType, Eigen::half>::value) {
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -2270,7 +2257,7 @@ port::Status CUDABlas::DoBlasGemmBatchedInternal(
|
||||||
if (stream->parent()->GetDeviceDescription().cuda_compute_capability(
|
if (stream->parent()->GetDeviceDescription().cuda_compute_capability(
|
||||||
&cc_major, &cc_minor) &&
|
&cc_major, &cc_minor) &&
|
||||||
cc_major >= 5) {
|
cc_major >= 5) {
|
||||||
bool use_tensor_ops = TensorOpMathEnabled() && data_type == CUDA_R_16F;
|
bool use_tensor_ops = data_type == CUDA_R_16F;
|
||||||
cublasGemmAlgo_t algo =
|
cublasGemmAlgo_t algo =
|
||||||
(use_tensor_ops ? CUBLAS_GEMM_DFALT_TENSOR_OP : CUBLAS_GEMM_DFALT);
|
(use_tensor_ops ? CUBLAS_GEMM_DFALT_TENSOR_OP : CUBLAS_GEMM_DFALT);
|
||||||
cudaDataType_t compute_type =
|
cudaDataType_t compute_type =
|
||||||
|
@ -2425,7 +2412,7 @@ bool CUDABlas::DoBlasGemmStridedBatched(
|
||||||
if (stream->parent()->GetDeviceDescription().cuda_compute_capability(
|
if (stream->parent()->GetDeviceDescription().cuda_compute_capability(
|
||||||
&cc_major, &cc_minor)) {
|
&cc_major, &cc_minor)) {
|
||||||
// GPUs < sm_70 don't support tensor ops.
|
// GPUs < sm_70 don't support tensor ops.
|
||||||
if (cc_major >= 7 && TensorOpMathEnabled()) {
|
if (cc_major >= 7) {
|
||||||
use_tensor_ops = true;
|
use_tensor_ops = true;
|
||||||
}
|
}
|
||||||
#if CUDA_VERSION >= 9010
|
#if CUDA_VERSION >= 9010
|
||||||
|
|
|
@ -601,31 +601,6 @@ class CudnnFilterDescriptor {
|
||||||
SE_DISALLOW_COPY_AND_ASSIGN(CudnnFilterDescriptor);
|
SE_DISALLOW_COPY_AND_ASSIGN(CudnnFilterDescriptor);
|
||||||
};
|
};
|
||||||
|
|
||||||
// A helper function to decide whether to enable the TENSOR_OP_MATH math type
|
|
||||||
bool TensorOpMathEnabled() {
|
|
||||||
static bool is_enabled = [] {
|
|
||||||
bool is_disabled = false;
|
|
||||||
TF_CHECK_OK(
|
|
||||||
tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUDNN_TENSOR_OP_MATH",
|
|
||||||
/*default_val=*/false, &is_disabled));
|
|
||||||
return !is_disabled;
|
|
||||||
}();
|
|
||||||
return is_enabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
// A helper function to decide whether to enable the TENSOR_OP_MATH math type
|
|
||||||
// for RNNs.
|
|
||||||
bool RnnTensorOpMathEnabled() {
|
|
||||||
static bool is_enabled = [] {
|
|
||||||
bool is_disabled = false;
|
|
||||||
TF_CHECK_OK(
|
|
||||||
tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUDNN_RNN_TENSOR_OP_MATH",
|
|
||||||
/*default_val=*/false, &is_disabled));
|
|
||||||
return !is_disabled;
|
|
||||||
}();
|
|
||||||
return is_enabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
// A helper function to decide whether to use
|
// A helper function to decide whether to use
|
||||||
// CUDNN_BATCHNORM_SPATIAL_PERSISTENT in batchnorm. This mode can be faster in
|
// CUDNN_BATCHNORM_SPATIAL_PERSISTENT in batchnorm. This mode can be faster in
|
||||||
// some tasks because an optimized path may be selected for CUDNN_DATA_FLOAT
|
// some tasks because an optimized path may be selected for CUDNN_DATA_FLOAT
|
||||||
|
@ -749,9 +724,7 @@ class CudnnConvolutionDescriptor {
|
||||||
#if CUDNN_VERSION >= 7000
|
#if CUDNN_VERSION >= 7000
|
||||||
cudnnMathType_t math_type =
|
cudnnMathType_t math_type =
|
||||||
(use_tensor_op_math ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH);
|
(use_tensor_op_math ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH);
|
||||||
if (TensorOpMathEnabled()) {
|
|
||||||
CHECK_CUDNN_OK(cudnnSetConvolutionMathType(handle_.get(), math_type));
|
CHECK_CUDNN_OK(cudnnSetConvolutionMathType(handle_.get(), math_type));
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1155,7 +1128,6 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor {
|
||||||
// in profile mode, which is run with algorithms returned from
|
// in profile mode, which is run with algorithms returned from
|
||||||
// GetRnnAlgorithms() (which are non-default and explicitly set whether to
|
// GetRnnAlgorithms() (which are non-default and explicitly set whether to
|
||||||
// use tensor ops). CuDNN 7.2.1 fixed this issue
|
// use tensor ops). CuDNN 7.2.1 fixed this issue
|
||||||
if (RnnTensorOpMathEnabled()) {
|
|
||||||
cudnnMathType_t math_type;
|
cudnnMathType_t math_type;
|
||||||
if (algorithm_config.algorithm().has_value()) {
|
if (algorithm_config.algorithm().has_value()) {
|
||||||
math_type = algorithm_config.algorithm()->tensor_ops_enabled()
|
math_type = algorithm_config.algorithm()->tensor_ops_enabled()
|
||||||
|
@ -1169,7 +1141,6 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor {
|
||||||
#endif // CUDNN_VERSION >= 7201
|
#endif // CUDNN_VERSION >= 7201
|
||||||
}
|
}
|
||||||
CHECK_CUDNN_OK(cudnnSetRNNMatrixMathType(rnn_desc.get(), math_type));
|
CHECK_CUDNN_OK(cudnnSetRNNMatrixMathType(rnn_desc.get(), math_type));
|
||||||
}
|
|
||||||
#endif // CUDNN_VERSION >= 7000
|
#endif // CUDNN_VERSION >= 7000
|
||||||
|
|
||||||
return CudnnRnnDescriptor(cudnn, std::move(rnn_desc), std::move(rnn_plan),
|
return CudnnRnnDescriptor(cudnn, std::move(rnn_desc), std::move(rnn_plan),
|
||||||
|
@ -2686,7 +2657,7 @@ AllocateCudnnConvolutionBackwardFilterWorkspace(
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool TensorOpMathAvailable(int cc_major) {
|
static bool TensorOpMathAvailable(int cc_major) {
|
||||||
return cc_major >= 7 && CUDNN_VERSION >= 7000 && TensorOpMathEnabled();
|
return cc_major >= 7 && CUDNN_VERSION >= 7000;
|
||||||
}
|
}
|
||||||
|
|
||||||
port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm(
|
port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm(
|
||||||
|
@ -3480,9 +3451,7 @@ bool CudnnSupport::GetRnnAlgorithms(
|
||||||
for (auto i : algo_types) {
|
for (auto i : algo_types) {
|
||||||
out_algorithms->push_back({i, /*use_tensor_ops=*/false});
|
out_algorithms->push_back({i, /*use_tensor_ops=*/false});
|
||||||
#if CUDNN_VERSION >= 7100
|
#if CUDNN_VERSION >= 7100
|
||||||
if (RnnTensorOpMathEnabled()) {
|
|
||||||
out_algorithms->push_back({i, /*use_tensor_ops=*/true});
|
out_algorithms->push_back({i, /*use_tensor_ops=*/true});
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
Loading…
Reference in New Issue