Be friendlier to older ptxas versions

This is pertinent because RTX 3090is now available which runs CC 8.6 while TF is still building against CUDA 11, and the ptxas that comes with it only supports CC 8.0 or older. This CL: 1. Makes the warning message less scary. 2. Introduces some caching to make the failure case (which will be more frequent for some users) faster. PiperOrigin-RevId: 348860909 Change-Id: I6d2215adee2ed9db8db2dffac6ae2fb6c8bfd74e
2020-12-23 16:12:07 -08:00 · 2020-12-23 16:12:07 -08:00 · 56d7ad1ed9
commit 56d7ad1ed9
parent 50ea0bcb08
2 changed files with 21 additions and 5 deletions
--- a/tensorflow/stream_executor/gpu/BUILD
+++ b/tensorflow/stream_executor/gpu/BUILD
@ -251,7 +251,7 @@ cc_library(
        "//tensorflow/stream_executor/cuda:cuda_driver",
        "//tensorflow/stream_executor/cuda:ptxas_wrapper",
        "//tensorflow/stream_executor/cuda:fatbinary_wrapper",
-    ]),
+    ]) + ["@com_google_absl//absl/container:flat_hash_set"],
 )

 cc_library(
--- a/tensorflow/stream_executor/gpu/asm_compiler.cc
+++ b/tensorflow/stream_executor/gpu/asm_compiler.cc
@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/gpu/asm_compiler.h"

 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_format.h"
 #include "absl/synchronization/mutex.h"
 #include "tensorflow/core/lib/core/errors.h"
@ -176,6 +177,24 @@ static std::string findCudaExecutable(const std::string binary_name,
  return binary_path;
 }

+static void LogPtxasTooOld(const std::string& ptxas_path, int cc_major,
+                           int cc_minor) {
+  using AlreadyLoggedSetTy =
+      absl::flat_hash_set<std::tuple<std::string, int, int>>;
+
+  static absl::Mutex* mutex = new absl::Mutex;
+  static AlreadyLoggedSetTy* already_logged = new AlreadyLoggedSetTy;
+
+  absl::MutexLock lock(mutex);
+
+  if (already_logged->insert({ptxas_path, cc_major, cc_minor}).second) {
+    LOG(WARNING) << "Falling back to the CUDA driver for PTX compilation; "
+                    "ptxas does not support CC "
+                 << cc_major << "." << cc_minor;
+    LOG(WARNING) << "Used ptxas at " << ptxas_path;
+  }
+}
+
 port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor,
                                                 const char* ptx_contents,
                                                 GpuAsmOpts options) {
@ -241,10 +260,7 @@ port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor,
    if (absl::StartsWith(stderr_output, "ptxas fatal   : Value '") &&
        absl::StrContains(stderr_output,
                          "is not defined for option 'gpu-name'")) {
-      LOG(WARNING) << "Your CUDA software stack is old. We fallback to the"
-                   << " NVIDIA driver for some compilation. Update your CUDA"
-                   << " version to get the best performance."
-                   << " The ptxas error was: " << stderr_output;
+      LogPtxasTooOld(ptxas_path, cc_major, cc_minor);
      return tensorflow::errors::Unimplemented(
          ptxas_path, " ptxas too old. Falling back to the driver to compile.");
    }