From 8b7a3db0b6e09415b5640be4986fb4d7c6e5209a Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Wed, 19 Feb 2020 17:20:19 -0800 Subject: [PATCH] [XLA] Respect TF_DETERMINISTIC_OPS environment variable for reductions PiperOrigin-RevId: 296094275 Change-Id: Iadcbf33d5d6432413c86d4d176865980de252eeb --- tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../compiler/xla/service/gpu/amdgpu_compiler.cc | 1 + .../compiler/xla/service/gpu/nvptx_compiler.cc | 14 +++++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index c812272829a..28e33b2a17e 100755 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -1285,6 +1285,7 @@ cc_library( ":reduction_dimension_grouper", ":reduction_layout_normalizer", ":target_constants", + ":tree_reduction_rewriter", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla/service:algebraic_simplifier", "//tensorflow/compiler/xla/service:hlo", diff --git a/tensorflow/compiler/xla/service/gpu/amdgpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/amdgpu_compiler.cc index 0e2e27ee9a3..97013804271 100644 --- a/tensorflow/compiler/xla/service/gpu/amdgpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/amdgpu_compiler.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.h" #include "tensorflow/compiler/xla/service/gpu/reduction_layout_normalizer.h" #include "tensorflow/compiler/xla/service/gpu/target_constants.h" +#include "tensorflow/compiler/xla/service/gpu/tree_reduction_rewriter.h" #include "tensorflow/compiler/xla/service/hlo_constant_folding.h" #include "tensorflow/compiler/xla/service/hlo_cse.h" #include "tensorflow/compiler/xla/service/hlo_pass_fix.h" diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index f61ccd77c86..a1a901f0b94 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -55,6 +55,7 @@ limitations under the License. #include "tensorflow/core/platform/cuda_libdevice_path.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/util/env_var.h" #include "tensorflow/stream_executor/cuda/cuda_diagnostics.h" #include "tensorflow/stream_executor/gpu/asm_compiler.h" @@ -151,6 +152,16 @@ Status NVPTXCompiler::OptimizeHloConvolutionCanonicalization( return Status::OK(); } +// TODO(cheshire): Duplication with gpu_conv_algorithm picker, figure out a +// right way to share this. +static bool RequireDeterminism() { + bool deterministic_ops = false; + TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS", + /*default_val=*/false, + &deterministic_ops)); + return deterministic_ops; +} + Status NVPTXCompiler::OptimizeHloPostLayoutAssignment( HloModule* hlo_module, se::StreamExecutor* stream_exec, se::DeviceMemoryAllocator* device_allocator) { @@ -172,7 +183,8 @@ Status NVPTXCompiler::OptimizeHloPostLayoutAssignment( options.set_is_layout_sensitive(true); pipeline.AddPass>(options); - if (hlo_module->config().debug_options().xla_gpu_deterministic_reductions()) { + if (RequireDeterminism() || + hlo_module->config().debug_options().xla_gpu_deterministic_reductions()) { pipeline.AddPass>(); }