Refactor code that enables deterministic operation of cuDNN

2019-12-27 13:08:14 -08:00 · 2019-12-27 13:08:14 -08:00 · 330e7ad14e
commit 330e7ad14e
parent 5341e3d299
8 changed files with 58 additions and 65 deletions
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@ -669,7 +669,6 @@ cc_library(
        "//tensorflow/core:stream_executor_no_cuda",
        "//tensorflow/core/util/proto:proto_utils",
        "//tensorflow/stream_executor:device_memory_allocator",
-        "//tensorflow/stream_executor/cuda:cuda_helpers",
        "//tensorflow/stream_executor/gpu:redzone_allocator",
        "@com_google_absl//absl/algorithm:container",
        "@com_google_absl//absl/strings",
--- a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
@ -35,8 +35,8 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/platform/logger.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/util/env_var.h"
 #include "tensorflow/core/util/proto/proto_utils.h"
-#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
 #include "tensorflow/stream_executor/gpu/redzone_allocator.h"

 namespace xla {
@ -310,6 +310,22 @@ StatusOr<AutotuneResult> GpuConvAlgorithmPicker::PickBestAlgorithm(
  return result_or;
 }

+// A helper function to decide whether to enable deterministic functionality.
+bool RequireCuDNNDeterminism() {
+  static bool require_cudnn_determinism = [] {
+    bool deterministic_ops = false;
+    TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
+                                               /*default_val=*/false,
+                                               &deterministic_ops));
+    bool cudnn_deterministic = false;
+    TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
+                                               /*default_val=*/false,
+                                               &cudnn_deterministic));
+    return deterministic_ops || cudnn_deterministic;
+  }();
+  return require_cudnn_determinism;
+}
+
 StatusOr<tensorflow::AutotuneResult>
 GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda(
    const HloCustomCallInstruction* instr, se::DeviceMemoryAllocator* allocator,
@ -562,7 +578,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda(
  }

  auto selected_result = filtered_results.begin();
-  if (!se::cuda::RequireCuDNNDeterminism()) {
+  if (!RequireCuDNNDeterminism()) {
    selected_result = absl::c_min_element(
        filtered_results,
        [](const AutotuneResult& lhs, const AutotuneResult& rhs) {
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@ -531,7 +531,6 @@ tf_cuda_library(
        "//tensorflow/core:lib",
        "//tensorflow/core:stream_executor",
        "//tensorflow/core/util/proto:proto_utils",
-        "//tensorflow/stream_executor/cuda:cuda_helpers",
        "//tensorflow/stream_executor/gpu:asm_compiler",
        "//tensorflow/stream_executor/gpu:redzone_allocator",
        "@com_google_absl//absl/algorithm:container",
--- a/tensorflow/core/kernels/gpu_utils.cc
+++ b/tensorflow/core/kernels/gpu_utils.cc
@ -24,8 +24,8 @@ limitations under the License.
 #include "tensorflow/core/platform/logger.h"
 #include "tensorflow/core/protobuf/autotuning.pb.h"
 #include "tensorflow/core/protobuf/conv_autotuning.pb.h"
+#include "tensorflow/core/util/env_var.h"
 #include "tensorflow/core/util/proto/proto_utils.h"
-#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
 #include "tensorflow/stream_executor/gpu/asm_compiler.h"
 #include "tensorflow/stream_executor/gpu/redzone_allocator.h"

@ -212,6 +212,22 @@ void LogFusedConvForwardAutotuneResults(
  Logger::GetSingleton()->LogProto(log);
 }

+// A helper function to decide whether to enable deterministic functionality.
+bool RequireDeterminism() {
+  static bool require_determinism = [] {
+    bool deterministic_ops = false;
+    TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
+                                               /*default_val=*/false,
+                                               &deterministic_ops));
+    bool cudnn_deterministic = false;
+    TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
+                                               /*default_val=*/false,
+                                               &cudnn_deterministic));
+    return deterministic_ops || cudnn_deterministic;
+  }();
+  return require_determinism;
+}
+
 Status BestCudnnConvAlgorithm(absl::Span<const AutotuneResult> results,
                              se::dnn::AlgorithmConfig* algo) {
  std::vector<AutotuneResult> filtered_results;
@ -228,7 +244,7 @@ Status BestCudnnConvAlgorithm(absl::Span<const AutotuneResult> results,

  auto selected_result = filtered_results.begin();
  auto selected_result_no_scratch = filtered_results_no_scratch.begin();
-  if (!se::cuda::RequireCuDNNDeterminism()) {
+  if (!RequireDeterminism()) {
    auto compare_run_times = [](const AutotuneResult& lhs,
                                const AutotuneResult& rhs) {
      return proto_utils::FromDurationProto(lhs.run_time()) <
--- a/tensorflow/stream_executor/cuda/BUILD
+++ b/tensorflow/stream_executor/cuda/BUILD
@ -343,7 +343,6 @@ cc_library(
        ":cuda_platform_id",
        ":cuda_stream",
        ":cuda_timer",
-        ":cuda_helpers",
        ":cudnn_version",
        ":cudnn_lib",
        "@com_google_absl//absl/strings",
@ -481,8 +480,7 @@ cc_library(
 # TODO(leary) we likely need to canonicalize/eliminate this.
 cc_library(
    name = "cuda_helpers",
-    srcs = if_cuda_is_configured(["cuda_helpers.cc"]),
-    hdrs = if_cuda_is_configured(["cuda_helpers.h"]),
+    textual_hdrs = if_cuda_is_configured(["cuda_helpers.h"]),
    deps = if_cuda_is_configured([
        "//tensorflow/stream_executor/gpu:gpu_helpers_header",
    ]),
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@ -27,7 +27,6 @@ limitations under the License.
 #include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
 #include "tensorflow/stream_executor/cuda/cuda_driver.h"
 #include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
-#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
 #include "tensorflow/stream_executor/cuda/cuda_platform_id.h"
 #include "tensorflow/stream_executor/cuda/cuda_stream.h"
 #include "tensorflow/stream_executor/cuda/cuda_timer.h"
@ -631,6 +630,22 @@ bool BatchnormSpatialPersistentEnabled() {
  return is_enabled;
 }

+// A helper function to decide whether to enable deterministic functionality.
+bool RequireDeterminism() {
+  static bool require_determinism = [] {
+    bool deterministic_ops = false;
+    TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
+                                               /*default_val=*/false,
+                                               &deterministic_ops));
+    bool cudnn_deterministic = false;
+    TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
+                                               /*default_val=*/false,
+                                               &cudnn_deterministic));
+    return deterministic_ops || cudnn_deterministic;
+  }();
+  return require_determinism;
+}
+
 std::tuple<int, int> GetCcMajorMinor(Stream* stream) {
  int cc_major, cc_minor;
  stream->parent()->GetDeviceDescription().cuda_compute_capability(&cc_major,
@ -729,10 +744,9 @@ class CudnnPoolingDescriptor {
    std::transform(shape64.cbegin(), shape64.cend(), shape.begin(),
                   &CheckedNarrowing<int64, int>);
    bool propagate_nans = pooling_descriptor.propagate_nans();
-    const auto cudnn_max_pooling_mode =
-                   stream_executor::cuda::RequireCuDNNDeterminism()
-                       ? CUDNN_POOLING_MAX_DETERMINISTIC
-                       : CUDNN_POOLING_MAX;
+    const auto cudnn_max_pooling_mode = RequireDeterminism()
+                                            ? CUDNN_POOLING_MAX_DETERMINISTIC
+                                            : CUDNN_POOLING_MAX;
    CHECK_CUDNN_OK(cudnnSetPoolingNdDescriptor(
        handle_.get(),
        (pooling_descriptor.mode() == dnn::PoolingMode::kMaximum
@ -3300,7 +3314,7 @@ bool CudnnSupport::GetConvolveBackwardDataAlgorithms(
  if (CudnnEnvVar<WinogradNonfused>::IsEnabled() && with_winograd_nonfused) {
    algo_types.push_back(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED);
  }
-  if (!stream_executor::cuda::RequireCuDNNDeterminism()) {
+  if (!RequireDeterminism()) {
    algo_types.push_back(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0);
  }

@ -3336,7 +3350,7 @@ bool CudnnSupport::GetConvolveBackwardFilterAlgorithms(
  if (CudnnEnvVar<WinogradNonfused>::IsEnabled() && with_winograd_nonfused) {
    algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED);
  }
-  if (!stream_executor::cuda::RequireCuDNNDeterminism()) {
+  if (!RequireDeterminism()) {
    algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0);
    algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3);
  }
--- a/tensorflow/stream_executor/cuda/cuda_helpers.cc
+++ b/tensorflow/stream_executor/cuda/cuda_helpers.cc
@ -1,39 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
-
-#include "tensorflow/core/util/env_var.h"
-
-namespace stream_executor {
-namespace cuda {
-
-bool RequireCuDNNDeterminism() {
-  static bool require_cudnn_determinism = [] {
-    bool deterministic_ops = false;
-    TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
-                                               /*default_val=*/false,
-                                               &deterministic_ops));
-    bool cudnn_deterministic = false;
-    TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
-                                               /*default_val=*/false,
-                                               &cudnn_deterministic));
-    return deterministic_ops || cudnn_deterministic;
-  }();
-  return require_cudnn_determinism;
-}
-
-} // namespace cuda
-} // namespace stream_executor
--- a/tensorflow/stream_executor/cuda/cuda_helpers.h
+++ b/tensorflow/stream_executor/cuda/cuda_helpers.h
@ -22,14 +22,4 @@ limitations under the License.

 #include "tensorflow/stream_executor/gpu/gpu_helpers.h"

-namespace stream_executor {
-namespace cuda {
-
-// A helper function to decide whether to enable deterministic cuDNN
-// functionality.
-bool RequireCuDNNDeterminism();
-
-} // namespace cuda
-} // namespace stream_executor
-
 #endif  // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_HELPERS_H_