Refactor code that enables deterministic operation of cuDNN

This commit is contained in:
Duncan Riach 2019-12-27 13:08:14 -08:00
parent 5341e3d299
commit 330e7ad14e
8 changed files with 58 additions and 65 deletions

View File

@ -669,7 +669,6 @@ cc_library(
"//tensorflow/core:stream_executor_no_cuda",
"//tensorflow/core/util/proto:proto_utils",
"//tensorflow/stream_executor:device_memory_allocator",
"//tensorflow/stream_executor/cuda:cuda_helpers",
"//tensorflow/stream_executor/gpu:redzone_allocator",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/strings",

View File

@ -35,8 +35,8 @@ limitations under the License.
#include "tensorflow/core/lib/strings/numbers.h"
#include "tensorflow/core/platform/logger.h"
#include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/util/env_var.h"
#include "tensorflow/core/util/proto/proto_utils.h"
#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
#include "tensorflow/stream_executor/gpu/redzone_allocator.h"
namespace xla {
@ -310,6 +310,22 @@ StatusOr<AutotuneResult> GpuConvAlgorithmPicker::PickBestAlgorithm(
return result_or;
}
// A helper function to decide whether to enable deterministic functionality.
bool RequireCuDNNDeterminism() {
static bool require_cudnn_determinism = [] {
bool deterministic_ops = false;
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
/*default_val=*/false,
&deterministic_ops));
bool cudnn_deterministic = false;
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
/*default_val=*/false,
&cudnn_deterministic));
return deterministic_ops || cudnn_deterministic;
}();
return require_cudnn_determinism;
}
StatusOr<tensorflow::AutotuneResult>
GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda(
const HloCustomCallInstruction* instr, se::DeviceMemoryAllocator* allocator,
@ -562,7 +578,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda(
}
auto selected_result = filtered_results.begin();
if (!se::cuda::RequireCuDNNDeterminism()) {
if (!RequireCuDNNDeterminism()) {
selected_result = absl::c_min_element(
filtered_results,
[](const AutotuneResult& lhs, const AutotuneResult& rhs) {

View File

@ -531,7 +531,6 @@ tf_cuda_library(
"//tensorflow/core:lib",
"//tensorflow/core:stream_executor",
"//tensorflow/core/util/proto:proto_utils",
"//tensorflow/stream_executor/cuda:cuda_helpers",
"//tensorflow/stream_executor/gpu:asm_compiler",
"//tensorflow/stream_executor/gpu:redzone_allocator",
"@com_google_absl//absl/algorithm:container",

View File

@ -24,8 +24,8 @@ limitations under the License.
#include "tensorflow/core/platform/logger.h"
#include "tensorflow/core/protobuf/autotuning.pb.h"
#include "tensorflow/core/protobuf/conv_autotuning.pb.h"
#include "tensorflow/core/util/env_var.h"
#include "tensorflow/core/util/proto/proto_utils.h"
#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
#include "tensorflow/stream_executor/gpu/asm_compiler.h"
#include "tensorflow/stream_executor/gpu/redzone_allocator.h"
@ -212,6 +212,22 @@ void LogFusedConvForwardAutotuneResults(
Logger::GetSingleton()->LogProto(log);
}
// A helper function to decide whether to enable deterministic functionality.
bool RequireDeterminism() {
static bool require_determinism = [] {
bool deterministic_ops = false;
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
/*default_val=*/false,
&deterministic_ops));
bool cudnn_deterministic = false;
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
/*default_val=*/false,
&cudnn_deterministic));
return deterministic_ops || cudnn_deterministic;
}();
return require_determinism;
}
Status BestCudnnConvAlgorithm(absl::Span<const AutotuneResult> results,
se::dnn::AlgorithmConfig* algo) {
std::vector<AutotuneResult> filtered_results;
@ -228,7 +244,7 @@ Status BestCudnnConvAlgorithm(absl::Span<const AutotuneResult> results,
auto selected_result = filtered_results.begin();
auto selected_result_no_scratch = filtered_results_no_scratch.begin();
if (!se::cuda::RequireCuDNNDeterminism()) {
if (!RequireDeterminism()) {
auto compare_run_times = [](const AutotuneResult& lhs,
const AutotuneResult& rhs) {
return proto_utils::FromDurationProto(lhs.run_time()) <

View File

@ -343,7 +343,6 @@ cc_library(
":cuda_platform_id",
":cuda_stream",
":cuda_timer",
":cuda_helpers",
":cudnn_version",
":cudnn_lib",
"@com_google_absl//absl/strings",
@ -481,8 +480,7 @@ cc_library(
# TODO(leary) we likely need to canonicalize/eliminate this.
cc_library(
name = "cuda_helpers",
srcs = if_cuda_is_configured(["cuda_helpers.cc"]),
hdrs = if_cuda_is_configured(["cuda_helpers.h"]),
textual_hdrs = if_cuda_is_configured(["cuda_helpers.h"]),
deps = if_cuda_is_configured([
"//tensorflow/stream_executor/gpu:gpu_helpers_header",
]),

View File

@ -27,7 +27,6 @@ limitations under the License.
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
#include "tensorflow/stream_executor/cuda/cuda_driver.h"
#include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
#include "tensorflow/stream_executor/cuda/cuda_platform_id.h"
#include "tensorflow/stream_executor/cuda/cuda_stream.h"
#include "tensorflow/stream_executor/cuda/cuda_timer.h"
@ -631,6 +630,22 @@ bool BatchnormSpatialPersistentEnabled() {
return is_enabled;
}
// A helper function to decide whether to enable deterministic functionality.
bool RequireDeterminism() {
static bool require_determinism = [] {
bool deterministic_ops = false;
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
/*default_val=*/false,
&deterministic_ops));
bool cudnn_deterministic = false;
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
/*default_val=*/false,
&cudnn_deterministic));
return deterministic_ops || cudnn_deterministic;
}();
return require_determinism;
}
std::tuple<int, int> GetCcMajorMinor(Stream* stream) {
int cc_major, cc_minor;
stream->parent()->GetDeviceDescription().cuda_compute_capability(&cc_major,
@ -729,10 +744,9 @@ class CudnnPoolingDescriptor {
std::transform(shape64.cbegin(), shape64.cend(), shape.begin(),
&CheckedNarrowing<int64, int>);
bool propagate_nans = pooling_descriptor.propagate_nans();
const auto cudnn_max_pooling_mode =
stream_executor::cuda::RequireCuDNNDeterminism()
? CUDNN_POOLING_MAX_DETERMINISTIC
: CUDNN_POOLING_MAX;
const auto cudnn_max_pooling_mode = RequireDeterminism()
? CUDNN_POOLING_MAX_DETERMINISTIC
: CUDNN_POOLING_MAX;
CHECK_CUDNN_OK(cudnnSetPoolingNdDescriptor(
handle_.get(),
(pooling_descriptor.mode() == dnn::PoolingMode::kMaximum
@ -3300,7 +3314,7 @@ bool CudnnSupport::GetConvolveBackwardDataAlgorithms(
if (CudnnEnvVar<WinogradNonfused>::IsEnabled() && with_winograd_nonfused) {
algo_types.push_back(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED);
}
if (!stream_executor::cuda::RequireCuDNNDeterminism()) {
if (!RequireDeterminism()) {
algo_types.push_back(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0);
}
@ -3336,7 +3350,7 @@ bool CudnnSupport::GetConvolveBackwardFilterAlgorithms(
if (CudnnEnvVar<WinogradNonfused>::IsEnabled() && with_winograd_nonfused) {
algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED);
}
if (!stream_executor::cuda::RequireCuDNNDeterminism()) {
if (!RequireDeterminism()) {
algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0);
algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3);
}

View File

@ -1,39 +0,0 @@
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
#include "tensorflow/core/util/env_var.h"
namespace stream_executor {
namespace cuda {
bool RequireCuDNNDeterminism() {
static bool require_cudnn_determinism = [] {
bool deterministic_ops = false;
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
/*default_val=*/false,
&deterministic_ops));
bool cudnn_deterministic = false;
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
/*default_val=*/false,
&cudnn_deterministic));
return deterministic_ops || cudnn_deterministic;
}();
return require_cudnn_determinism;
}
} // namespace cuda
} // namespace stream_executor

View File

@ -22,14 +22,4 @@ limitations under the License.
#include "tensorflow/stream_executor/gpu/gpu_helpers.h"
namespace stream_executor {
namespace cuda {
// A helper function to decide whether to enable deterministic cuDNN
// functionality.
bool RequireCuDNNDeterminism();
} // namespace cuda
} // namespace stream_executor
#endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_HELPERS_H_