From 56d7ad1ed931d0a63d7cc9baa8e4110aa68c8b60 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 23 Dec 2020 16:12:07 -0800
Subject: [PATCH] Be friendlier to older ptxas versions

This is pertinent because RTX 3090is now available which runs CC 8.6 while TF is
still building against CUDA 11, and the ptxas that comes with it only supports
CC 8.0 or older.

This CL:

 1. Makes the warning message less scary.
 2. Introduces some caching to make the failure case (which will be more
    frequent for some users) faster.

PiperOrigin-RevId: 348860909
Change-Id: I6d2215adee2ed9db8db2dffac6ae2fb6c8bfd74e
---
 tensorflow/stream_executor/gpu/BUILD          |  2 +-
 .../stream_executor/gpu/asm_compiler.cc       | 24 +++++++++++++++----
 2 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/tensorflow/stream_executor/gpu/BUILD b/tensorflow/stream_executor/gpu/BUILD
index 08a76864d53..63ade0d2cce 100644
--- a/tensorflow/stream_executor/gpu/BUILD
+++ b/tensorflow/stream_executor/gpu/BUILD
@@ -251,7 +251,7 @@ cc_library(
         "//tensorflow/stream_executor/cuda:cuda_driver",
         "//tensorflow/stream_executor/cuda:ptxas_wrapper",
         "//tensorflow/stream_executor/cuda:fatbinary_wrapper",
-    ]),
+    ]) + ["@com_google_absl//absl/container:flat_hash_set"],
 )
 
 cc_library(
diff --git a/tensorflow/stream_executor/gpu/asm_compiler.cc b/tensorflow/stream_executor/gpu/asm_compiler.cc
index d08155703b6..d60b1531e91 100644
--- a/tensorflow/stream_executor/gpu/asm_compiler.cc
+++ b/tensorflow/stream_executor/gpu/asm_compiler.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/gpu/asm_compiler.h"
 
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_format.h"
 #include "absl/synchronization/mutex.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -176,6 +177,24 @@ static std::string findCudaExecutable(const std::string binary_name,
   return binary_path;
 }
 
+static void LogPtxasTooOld(const std::string& ptxas_path, int cc_major,
+                           int cc_minor) {
+  using AlreadyLoggedSetTy =
+      absl::flat_hash_set<std::tuple<std::string, int, int>>;
+
+  static absl::Mutex* mutex = new absl::Mutex;
+  static AlreadyLoggedSetTy* already_logged = new AlreadyLoggedSetTy;
+
+  absl::MutexLock lock(mutex);
+
+  if (already_logged->insert({ptxas_path, cc_major, cc_minor}).second) {
+    LOG(WARNING) << "Falling back to the CUDA driver for PTX compilation; "
+                    "ptxas does not support CC "
+                 << cc_major << "." << cc_minor;
+    LOG(WARNING) << "Used ptxas at " << ptxas_path;
+  }
+}
+
 port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor,
                                                  const char* ptx_contents,
                                                  GpuAsmOpts options) {
@@ -241,10 +260,7 @@ port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor,
     if (absl::StartsWith(stderr_output, "ptxas fatal   : Value '") &&
         absl::StrContains(stderr_output,
                           "is not defined for option 'gpu-name'")) {
-      LOG(WARNING) << "Your CUDA software stack is old. We fallback to the"
-                   << " NVIDIA driver for some compilation. Update your CUDA"
-                   << " version to get the best performance."
-                   << " The ptxas error was: " << stderr_output;
+      LogPtxasTooOld(ptxas_path, cc_major, cc_minor);
       return tensorflow::errors::Unimplemented(
           ptxas_path, " ptxas too old. Falling back to the driver to compile.");
     }