From 891416ac22515cccd0b1b9dff722f931bed83631 Mon Sep 17 00:00:00 2001 From: Duncan Riach Date: Wed, 15 Jan 2020 18:19:26 -0800 Subject: [PATCH] Add comments about plan to migrate from environment variables to tf.config plus plumbing --- .../service/gpu/gpu_conv_algorithm_picker.cc | 19 ++++++++++--- tensorflow/core/kernels/gpu_utils.cc | 23 ++++++++++++---- tensorflow/stream_executor/cuda/cuda_dnn.cc | 27 ++++++++++++++----- 3 files changed, 54 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc index a7f9877ff3e..95a5827bf44 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc @@ -310,8 +310,21 @@ StatusOr GpuConvAlgorithmPicker::PickBestAlgorithm( return result_or; } -// A helper function to decide whether to enable deterministic functionality. -bool RequireCuDNNDeterminism() { +// The following function allows deterministic ops to be implemented relatively +// quickly using environment variables. It is intended to be temporary. The +// longer-term intention is to enable deterministic ops via tf.config and +// appropriate plumbing. See the discussion on PR 34951 for more information: +// https://github.com/tensorflow/tensorflow/pull/34951#discussion_r355682316 +// This function and associated comment are replicated in the following three +// places: +// 1. tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc +// 2. tensorflow/core/kernels/gpu_utils.cc +// 3. tensorflow/stream_executor/cuda/cuda_dnn.cc +// When implementing the plumbing, you should also search for the use of +// TF_DETERMINISTIC_OPS on its own. +// TODO(duncanriach): move to an API that uses tf.config and implement the first +// phase of plumbing. +bool RequireCudnnDeterminism() { static bool require_cudnn_determinism = [] { bool deterministic_ops = false; TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS", @@ -578,7 +591,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda( } auto selected_result = filtered_results.begin(); - if (!RequireCuDNNDeterminism()) { + if (!RequireCudnnDeterminism()) { selected_result = absl::c_min_element( filtered_results, [](const AutotuneResult& lhs, const AutotuneResult& rhs) { diff --git a/tensorflow/core/kernels/gpu_utils.cc b/tensorflow/core/kernels/gpu_utils.cc index 5fbee449588..d62e6498376 100644 --- a/tensorflow/core/kernels/gpu_utils.cc +++ b/tensorflow/core/kernels/gpu_utils.cc @@ -212,9 +212,22 @@ void LogFusedConvForwardAutotuneResults( Logger::GetSingleton()->LogProto(log); } -// A helper function to decide whether to enable deterministic functionality. -bool RequireDeterminism() { - static bool require_determinism = [] { +// The following function allows deterministic ops to be implemented relatively +// quickly using environment variables. It is intended to be temporary. The +// longer-term intention is to enable deterministic ops via tf.config and +// appropriate plumbing. See the discussion on PR 34951 for more information: +// https://github.com/tensorflow/tensorflow/pull/34951#discussion_r355682316 +// This function and associated comment are replicated in the following three +// places: +// 1. tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc +// 2. tensorflow/core/kernels/gpu_utils.cc +// 3. tensorflow/stream_executor/cuda/cuda_dnn.cc +// When implementing the plumbing, you should also search for the use of +// TF_DETERMINISTIC_OPS on its own. +// TODO(duncanriach): move to an API that uses tf.config and implement the first +// phase of plumbing. +bool RequireCudnnDeterminism() { + static bool require_cudnn_determinism = [] { bool deterministic_ops = false; TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS", /*default_val=*/false, @@ -225,7 +238,7 @@ bool RequireDeterminism() { &cudnn_deterministic)); return deterministic_ops || cudnn_deterministic; }(); - return require_determinism; + return require_cudnn_determinism; } Status BestCudnnConvAlgorithm(absl::Span results, @@ -244,7 +257,7 @@ Status BestCudnnConvAlgorithm(absl::Span results, auto selected_result = filtered_results.begin(); auto selected_result_no_scratch = filtered_results_no_scratch.begin(); - if (!RequireDeterminism()) { + if (!RequireCudnnDeterminism()) { auto compare_run_times = [](const AutotuneResult& lhs, const AutotuneResult& rhs) { return proto_utils::FromDurationProto(lhs.run_time()) < diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 968ad660fd7..2c640d4ff0b 100755 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -630,9 +630,22 @@ bool BatchnormSpatialPersistentEnabled() { return is_enabled; } -// A helper function to decide whether to enable deterministic functionality. -bool RequireDeterminism() { - static bool require_determinism = [] { +// The following function allows deterministic ops to be implemented relatively +// quickly using environment variables. It is intended to be temporary. The +// longer-term intention is to enable deterministic ops via tf.config and +// appropriate plumbing. See the discussion on PR 34951 for more information: +// https://github.com/tensorflow/tensorflow/pull/34951#discussion_r355682316 +// This function and associated comment are replicated in the following three +// places: +// 1. tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc +// 2. tensorflow/core/kernels/gpu_utils.cc +// 3. tensorflow/stream_executor/cuda/cuda_dnn.cc +// When implementing the plumbing, you should also search for the use of +// TF_DETERMINISTIC_OPS on its own. +// TODO(duncanriach): move to an API that uses tf.config and implement the first +// phase of plumbing. +bool RequireCudnnDeterminism() { + static bool require_cudnn_determinism = [] { bool deterministic_ops = false; TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS", /*default_val=*/false, @@ -643,7 +656,7 @@ bool RequireDeterminism() { &cudnn_deterministic)); return deterministic_ops || cudnn_deterministic; }(); - return require_determinism; + return require_cudnn_determinism; } std::tuple GetCcMajorMinor(Stream* stream) { @@ -744,7 +757,7 @@ class CudnnPoolingDescriptor { std::transform(shape64.cbegin(), shape64.cend(), shape.begin(), &CheckedNarrowing); bool propagate_nans = pooling_descriptor.propagate_nans(); - const auto cudnn_max_pooling_mode = RequireDeterminism() + const auto cudnn_max_pooling_mode = RequireCudnnDeterminism() ? CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX; CHECK_CUDNN_OK(cudnnSetPoolingNdDescriptor( @@ -3314,7 +3327,7 @@ bool CudnnSupport::GetConvolveBackwardDataAlgorithms( if (CudnnEnvVar::IsEnabled() && with_winograd_nonfused) { algo_types.push_back(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED); } - if (!RequireDeterminism()) { + if (!RequireCudnnDeterminism()) { algo_types.push_back(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0); } @@ -3350,7 +3363,7 @@ bool CudnnSupport::GetConvolveBackwardFilterAlgorithms( if (CudnnEnvVar::IsEnabled() && with_winograd_nonfused) { algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED); } - if (!RequireDeterminism()) { + if (!RequireCudnnDeterminism()) { algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0); algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3); }