Refactor code that enables deterministic operation of cuDNN
This commit is contained in:
parent
5341e3d299
commit
330e7ad14e
@ -669,7 +669,6 @@ cc_library(
|
||||
"//tensorflow/core:stream_executor_no_cuda",
|
||||
"//tensorflow/core/util/proto:proto_utils",
|
||||
"//tensorflow/stream_executor:device_memory_allocator",
|
||||
"//tensorflow/stream_executor/cuda:cuda_helpers",
|
||||
"//tensorflow/stream_executor/gpu:redzone_allocator",
|
||||
"@com_google_absl//absl/algorithm:container",
|
||||
"@com_google_absl//absl/strings",
|
||||
|
@ -35,8 +35,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/lib/strings/numbers.h"
|
||||
#include "tensorflow/core/platform/logger.h"
|
||||
#include "tensorflow/core/platform/mutex.h"
|
||||
#include "tensorflow/core/util/env_var.h"
|
||||
#include "tensorflow/core/util/proto/proto_utils.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
|
||||
#include "tensorflow/stream_executor/gpu/redzone_allocator.h"
|
||||
|
||||
namespace xla {
|
||||
@ -310,6 +310,22 @@ StatusOr<AutotuneResult> GpuConvAlgorithmPicker::PickBestAlgorithm(
|
||||
return result_or;
|
||||
}
|
||||
|
||||
// A helper function to decide whether to enable deterministic functionality.
|
||||
bool RequireCuDNNDeterminism() {
|
||||
static bool require_cudnn_determinism = [] {
|
||||
bool deterministic_ops = false;
|
||||
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
|
||||
/*default_val=*/false,
|
||||
&deterministic_ops));
|
||||
bool cudnn_deterministic = false;
|
||||
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
|
||||
/*default_val=*/false,
|
||||
&cudnn_deterministic));
|
||||
return deterministic_ops || cudnn_deterministic;
|
||||
}();
|
||||
return require_cudnn_determinism;
|
||||
}
|
||||
|
||||
StatusOr<tensorflow::AutotuneResult>
|
||||
GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda(
|
||||
const HloCustomCallInstruction* instr, se::DeviceMemoryAllocator* allocator,
|
||||
@ -562,7 +578,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda(
|
||||
}
|
||||
|
||||
auto selected_result = filtered_results.begin();
|
||||
if (!se::cuda::RequireCuDNNDeterminism()) {
|
||||
if (!RequireCuDNNDeterminism()) {
|
||||
selected_result = absl::c_min_element(
|
||||
filtered_results,
|
||||
[](const AutotuneResult& lhs, const AutotuneResult& rhs) {
|
||||
|
@ -531,7 +531,6 @@ tf_cuda_library(
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:stream_executor",
|
||||
"//tensorflow/core/util/proto:proto_utils",
|
||||
"//tensorflow/stream_executor/cuda:cuda_helpers",
|
||||
"//tensorflow/stream_executor/gpu:asm_compiler",
|
||||
"//tensorflow/stream_executor/gpu:redzone_allocator",
|
||||
"@com_google_absl//absl/algorithm:container",
|
||||
|
@ -24,8 +24,8 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/logger.h"
|
||||
#include "tensorflow/core/protobuf/autotuning.pb.h"
|
||||
#include "tensorflow/core/protobuf/conv_autotuning.pb.h"
|
||||
#include "tensorflow/core/util/env_var.h"
|
||||
#include "tensorflow/core/util/proto/proto_utils.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
|
||||
#include "tensorflow/stream_executor/gpu/asm_compiler.h"
|
||||
#include "tensorflow/stream_executor/gpu/redzone_allocator.h"
|
||||
|
||||
@ -212,6 +212,22 @@ void LogFusedConvForwardAutotuneResults(
|
||||
Logger::GetSingleton()->LogProto(log);
|
||||
}
|
||||
|
||||
// A helper function to decide whether to enable deterministic functionality.
|
||||
bool RequireDeterminism() {
|
||||
static bool require_determinism = [] {
|
||||
bool deterministic_ops = false;
|
||||
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
|
||||
/*default_val=*/false,
|
||||
&deterministic_ops));
|
||||
bool cudnn_deterministic = false;
|
||||
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
|
||||
/*default_val=*/false,
|
||||
&cudnn_deterministic));
|
||||
return deterministic_ops || cudnn_deterministic;
|
||||
}();
|
||||
return require_determinism;
|
||||
}
|
||||
|
||||
Status BestCudnnConvAlgorithm(absl::Span<const AutotuneResult> results,
|
||||
se::dnn::AlgorithmConfig* algo) {
|
||||
std::vector<AutotuneResult> filtered_results;
|
||||
@ -228,7 +244,7 @@ Status BestCudnnConvAlgorithm(absl::Span<const AutotuneResult> results,
|
||||
|
||||
auto selected_result = filtered_results.begin();
|
||||
auto selected_result_no_scratch = filtered_results_no_scratch.begin();
|
||||
if (!se::cuda::RequireCuDNNDeterminism()) {
|
||||
if (!RequireDeterminism()) {
|
||||
auto compare_run_times = [](const AutotuneResult& lhs,
|
||||
const AutotuneResult& rhs) {
|
||||
return proto_utils::FromDurationProto(lhs.run_time()) <
|
||||
|
@ -343,7 +343,6 @@ cc_library(
|
||||
":cuda_platform_id",
|
||||
":cuda_stream",
|
||||
":cuda_timer",
|
||||
":cuda_helpers",
|
||||
":cudnn_version",
|
||||
":cudnn_lib",
|
||||
"@com_google_absl//absl/strings",
|
||||
@ -481,8 +480,7 @@ cc_library(
|
||||
# TODO(leary) we likely need to canonicalize/eliminate this.
|
||||
cc_library(
|
||||
name = "cuda_helpers",
|
||||
srcs = if_cuda_is_configured(["cuda_helpers.cc"]),
|
||||
hdrs = if_cuda_is_configured(["cuda_helpers.h"]),
|
||||
textual_hdrs = if_cuda_is_configured(["cuda_helpers.h"]),
|
||||
deps = if_cuda_is_configured([
|
||||
"//tensorflow/stream_executor/gpu:gpu_helpers_header",
|
||||
]),
|
||||
|
@ -27,7 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/stream_executor/cuda/cuda_diagnostics.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_driver.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_platform_id.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_stream.h"
|
||||
#include "tensorflow/stream_executor/cuda/cuda_timer.h"
|
||||
@ -631,6 +630,22 @@ bool BatchnormSpatialPersistentEnabled() {
|
||||
return is_enabled;
|
||||
}
|
||||
|
||||
// A helper function to decide whether to enable deterministic functionality.
|
||||
bool RequireDeterminism() {
|
||||
static bool require_determinism = [] {
|
||||
bool deterministic_ops = false;
|
||||
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
|
||||
/*default_val=*/false,
|
||||
&deterministic_ops));
|
||||
bool cudnn_deterministic = false;
|
||||
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
|
||||
/*default_val=*/false,
|
||||
&cudnn_deterministic));
|
||||
return deterministic_ops || cudnn_deterministic;
|
||||
}();
|
||||
return require_determinism;
|
||||
}
|
||||
|
||||
std::tuple<int, int> GetCcMajorMinor(Stream* stream) {
|
||||
int cc_major, cc_minor;
|
||||
stream->parent()->GetDeviceDescription().cuda_compute_capability(&cc_major,
|
||||
@ -729,10 +744,9 @@ class CudnnPoolingDescriptor {
|
||||
std::transform(shape64.cbegin(), shape64.cend(), shape.begin(),
|
||||
&CheckedNarrowing<int64, int>);
|
||||
bool propagate_nans = pooling_descriptor.propagate_nans();
|
||||
const auto cudnn_max_pooling_mode =
|
||||
stream_executor::cuda::RequireCuDNNDeterminism()
|
||||
? CUDNN_POOLING_MAX_DETERMINISTIC
|
||||
: CUDNN_POOLING_MAX;
|
||||
const auto cudnn_max_pooling_mode = RequireDeterminism()
|
||||
? CUDNN_POOLING_MAX_DETERMINISTIC
|
||||
: CUDNN_POOLING_MAX;
|
||||
CHECK_CUDNN_OK(cudnnSetPoolingNdDescriptor(
|
||||
handle_.get(),
|
||||
(pooling_descriptor.mode() == dnn::PoolingMode::kMaximum
|
||||
@ -3300,7 +3314,7 @@ bool CudnnSupport::GetConvolveBackwardDataAlgorithms(
|
||||
if (CudnnEnvVar<WinogradNonfused>::IsEnabled() && with_winograd_nonfused) {
|
||||
algo_types.push_back(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED);
|
||||
}
|
||||
if (!stream_executor::cuda::RequireCuDNNDeterminism()) {
|
||||
if (!RequireDeterminism()) {
|
||||
algo_types.push_back(CUDNN_CONVOLUTION_BWD_DATA_ALGO_0);
|
||||
}
|
||||
|
||||
@ -3336,7 +3350,7 @@ bool CudnnSupport::GetConvolveBackwardFilterAlgorithms(
|
||||
if (CudnnEnvVar<WinogradNonfused>::IsEnabled() && with_winograd_nonfused) {
|
||||
algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED);
|
||||
}
|
||||
if (!stream_executor::cuda::RequireCuDNNDeterminism()) {
|
||||
if (!RequireDeterminism()) {
|
||||
algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0);
|
||||
algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3);
|
||||
}
|
||||
|
@ -1,39 +0,0 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/stream_executor/cuda/cuda_helpers.h"
|
||||
|
||||
#include "tensorflow/core/util/env_var.h"
|
||||
|
||||
namespace stream_executor {
|
||||
namespace cuda {
|
||||
|
||||
bool RequireCuDNNDeterminism() {
|
||||
static bool require_cudnn_determinism = [] {
|
||||
bool deterministic_ops = false;
|
||||
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
|
||||
/*default_val=*/false,
|
||||
&deterministic_ops));
|
||||
bool cudnn_deterministic = false;
|
||||
TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_CUDNN_DETERMINISTIC",
|
||||
/*default_val=*/false,
|
||||
&cudnn_deterministic));
|
||||
return deterministic_ops || cudnn_deterministic;
|
||||
}();
|
||||
return require_cudnn_determinism;
|
||||
}
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace stream_executor
|
@ -22,14 +22,4 @@ limitations under the License.
|
||||
|
||||
#include "tensorflow/stream_executor/gpu/gpu_helpers.h"
|
||||
|
||||
namespace stream_executor {
|
||||
namespace cuda {
|
||||
|
||||
// A helper function to decide whether to enable deterministic cuDNN
|
||||
// functionality.
|
||||
bool RequireCuDNNDeterminism();
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace stream_executor
|
||||
|
||||
#endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_HELPERS_H_
|
||||
|
Loading…
Reference in New Issue
Block a user