fixed the problem of default rnn ops never use tensor cores

2019-05-28 17:23:59 -07:00 · 2019-05-28 17:23:59 -07:00 · 660925846c
commit 660925846c
parent 53fd64291f
1 changed files with 15 additions and 7 deletions
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@ -1087,15 +1087,23 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor {
    // We can only reasonably expect the user to handle the subsequent failure
    // in profile mode, which is run with algorithms returned from
    // GetRnnAlgorithms() (which are non-default and explicitly set whether to
-    // use tensor ops).
+    // use tensor ops). CuDNN 7.2.1 fixed this issue
-    if (RnnTensorOpMathEnabled() && algorithm_config.algorithm().has_value()) {
+    if (RnnTensorOpMathEnabled()) {
-      cudnnMathType_t math_type =
+      cudnnMathType_t math_type;
-          algorithm_config.algorithm()->tensor_ops_enabled()
+      if (algorithm_config.algorithm().has_value()) {
-              ? CUDNN_TENSOR_OP_MATH
+        math_type = algorithm_config.algorithm()->tensor_ops_enabled()
-              : CUDNN_DEFAULT_MATH;
+                        ? CUDNN_TENSOR_OP_MATH
                        : CUDNN_DEFAULT_MATH;
      } else {
 #if CUDNN_VERSION >= 7201
        math_type = CUDNN_TENSOR_OP_MATH;
 #else
        math_type = CUDNN_DEFAULT_MATH;
 #endif // CUDNN_VERSION >= 7201
      }
      CHECK_CUDNN_OK(cudnnSetRNNMatrixMathType(rnn_desc.get(), math_type));
    }
-#endif
+#endif // CUDNN_VERSION >= 7000
    return CudnnRnnDescriptor(cudnn, std::move(rnn_desc), std::move(rnn_plan),
                              num_layers, hidden_size, input_size, batch_size,