From bb2a1d3ee8d15ca950864ef4abd556fc8de1f456 Mon Sep 17 00:00:00 2001
From: George Karpenkov <cheshire@google.com>
Date: Wed, 26 Jun 2019 17:12:27 -0700
Subject: [PATCH] Provide a flag TF_DISABLE_RZ_CHECK to disable redzone check
 during TF convolution autotuning

The flag has to be set to "1" for the redzone checking to be disabled.

PiperOrigin-RevId: 255295979
---
 tensorflow/core/kernels/conv_ops.cc | 42 ++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 13 deletions(-)
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index 0ea367d4250..56162cae1d7 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -73,6 +73,7 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 
 namespace {
+
 template <typename Device, typename T>
 struct LaunchGeneric {
   void operator()(OpKernelContext* ctx, const Tensor& input,
@@ -575,6 +576,11 @@ template struct LaunchConv2DOp<CPUDevice, float>;
 template struct LaunchConv2DOp<CPUDevice, double>;
 
 #if GOOGLE_CUDA
+static bool RedzoneCheckDisabled() {
+  const char* disable_rz_str = std::getenv("TF_DISABLE_RZ_CHECK");
+  return disable_rz_str != nullptr && std::strcmp(disable_rz_str, "1") == 0;
+}
+
 int64 GetDnnWorkspaceLimit(const string& envvar_in_mb,
                            int64 default_value_in_bytes) {
   const char* workspace_limit_in_mb_str = getenv(envvar_in_mb.c_str());
@@ -997,19 +1003,24 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
                                             se::cuda::PtxCompilationOptions());
 
     se::DeviceMemory<T> output_tensor;
-    auto output_rz_or = rz_allocator.AllocateBytes(stream, output_ptr.size());
-    if (!output_rz_or.ok()) {
-      static std::once_flag rz_allocation_failure_logged;
-      std::call_once(rz_allocation_failure_logged, []() {
-        LOG(WARNING)
-            << "Failed to allocate memory for convolution redzone "
-            << "checking; skipping this check. This is benign and only "
-            << "means that we won't check cudnn for out-of-bounds reads "
-            << "and writes. This message will only be printed once.";
-      });
-      output_tensor = output_ptr;
+
+    if (!RedzoneCheckDisabled()) {
+      auto output_rz_or = rz_allocator.AllocateBytes(stream, output_ptr.size());
+      if (!output_rz_or.ok()) {
+        static std::once_flag rz_allocation_failure_logged;
+        std::call_once(rz_allocation_failure_logged, []() {
+          LOG(WARNING)
+              << "Failed to allocate memory for convolution redzone "
+              << "checking; skipping this check. This is benign and only "
+              << "means that we won't check cudnn for out-of-bounds reads "
+              << "and writes. This message will only be printed once.";
+        });
+        output_tensor = output_ptr;
+      } else {
+        output_tensor = se::DeviceMemory<T>(output_rz_or.ValueOrDie());
+      }
     } else {
-      output_tensor = se::DeviceMemory<T>(output_rz_or.ValueOrDie());
+      output_tensor = output_ptr;
     }
 
     std::vector<tensorflow::AutotuneResult> results;
@@ -1019,13 +1030,18 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
       se::cuda::RedzoneAllocator rz_scratch_allocator(
           stream->parent()->device_ordinal(), &tf_allocator_adapter,
           se::cuda::PtxCompilationOptions());
+      DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
+      se::ScratchAllocator* allocator_used =
+          !RedzoneCheckDisabled()
+              ? static_cast<se::ScratchAllocator*>(&rz_scratch_allocator)
+              : static_cast<se::ScratchAllocator*>(&scratch_allocator);
 
       ProfileResult profile_result;
       bool cudnn_launch_status =
           stream
               ->ThenConvolveWithAlgorithm(
                   input_desc, input_ptr, filter_desc, filter_ptr, conv_desc,
-                  output_desc, &output_tensor, &rz_scratch_allocator,
+                  output_desc, &output_tensor, allocator_used,
                   AlgorithmConfig(profile_algorithm), &profile_result)
               .ok();
       if (cudnn_launch_status && profile_result.is_valid()) {