From 56d7ad1ed931d0a63d7cc9baa8e4110aa68c8b60 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 23 Dec 2020 16:12:07 -0800 Subject: [PATCH] Be friendlier to older ptxas versions This is pertinent because RTX 3090is now available which runs CC 8.6 while TF is still building against CUDA 11, and the ptxas that comes with it only supports CC 8.0 or older. This CL: 1. Makes the warning message less scary. 2. Introduces some caching to make the failure case (which will be more frequent for some users) faster. PiperOrigin-RevId: 348860909 Change-Id: I6d2215adee2ed9db8db2dffac6ae2fb6c8bfd74e --- tensorflow/stream_executor/gpu/BUILD | 2 +- .../stream_executor/gpu/asm_compiler.cc | 24 +++++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tensorflow/stream_executor/gpu/BUILD b/tensorflow/stream_executor/gpu/BUILD index 08a76864d53..63ade0d2cce 100644 --- a/tensorflow/stream_executor/gpu/BUILD +++ b/tensorflow/stream_executor/gpu/BUILD @@ -251,7 +251,7 @@ cc_library( "//tensorflow/stream_executor/cuda:cuda_driver", "//tensorflow/stream_executor/cuda:ptxas_wrapper", "//tensorflow/stream_executor/cuda:fatbinary_wrapper", - ]), + ]) + ["@com_google_absl//absl/container:flat_hash_set"], ) cc_library( diff --git a/tensorflow/stream_executor/gpu/asm_compiler.cc b/tensorflow/stream_executor/gpu/asm_compiler.cc index d08155703b6..d60b1531e91 100644 --- a/tensorflow/stream_executor/gpu/asm_compiler.cc +++ b/tensorflow/stream_executor/gpu/asm_compiler.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/stream_executor/gpu/asm_compiler.h" #include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_format.h" #include "absl/synchronization/mutex.h" #include "tensorflow/core/lib/core/errors.h" @@ -176,6 +177,24 @@ static std::string findCudaExecutable(const std::string binary_name, return binary_path; } +static void LogPtxasTooOld(const std::string& ptxas_path, int cc_major, + int cc_minor) { + using AlreadyLoggedSetTy = + absl::flat_hash_set>; + + static absl::Mutex* mutex = new absl::Mutex; + static AlreadyLoggedSetTy* already_logged = new AlreadyLoggedSetTy; + + absl::MutexLock lock(mutex); + + if (already_logged->insert({ptxas_path, cc_major, cc_minor}).second) { + LOG(WARNING) << "Falling back to the CUDA driver for PTX compilation; " + "ptxas does not support CC " + << cc_major << "." << cc_minor; + LOG(WARNING) << "Used ptxas at " << ptxas_path; + } +} + port::StatusOr> CompileGpuAsm(int cc_major, int cc_minor, const char* ptx_contents, GpuAsmOpts options) { @@ -241,10 +260,7 @@ port::StatusOr> CompileGpuAsm(int cc_major, int cc_minor, if (absl::StartsWith(stderr_output, "ptxas fatal : Value '") && absl::StrContains(stderr_output, "is not defined for option 'gpu-name'")) { - LOG(WARNING) << "Your CUDA software stack is old. We fallback to the" - << " NVIDIA driver for some compilation. Update your CUDA" - << " version to get the best performance." - << " The ptxas error was: " << stderr_output; + LogPtxasTooOld(ptxas_path, cc_major, cc_minor); return tensorflow::errors::Unimplemented( ptxas_path, " ptxas too old. Falling back to the driver to compile."); }