Merge pull request #43888 from nouiz:upstream_old_ptxas

PiperOrigin-RevId: 337531954 Change-Id: I096cf095536bc8c819ab61f0020fea20eca341d6
2020-10-16 10:42:20 -07:00 · 2020-10-16 10:42:20 -07:00 · de2c020a2a
commit de2c020a2a
parent cd4bdebd30 7544886e58
4 changed files with 70 additions and 40 deletions
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@ -198,6 +198,42 @@ absl::optional<bool> CanShareBufferHint(const HloInstruction* user,
  return absl::nullopt;
 }

+// Try to load ptx from files defined in the FLAGS. If successful, return true.
+bool MaybeLoadPtxFromFile(const HloModule* module, std::string* ptx) {
+  // If the xla_gpu_ptx_file options is set, be explicit when a file is used
+  // and warn when a file is not used to ease catching typo in filename.
+  std::string prefix = xla::FilenameFor(*module, "", *ptx);
+  std::string matched_filename;
+  for (const string& full_filename :
+       module->config().debug_options().xla_gpu_ptx_file()) {
+    // To ease comparing many PTX versions, accept different suffixes then
+    // the original filename.
+    auto filename = tensorflow::io::Basename(full_filename);
+    if (absl::StartsWith(filename, prefix)) {
+      matched_filename = full_filename;
+      VLOG(0) << "RunBackend() - Will load PTX from file: " << full_filename;
+      break;
+    }
+  }
+  if (module->config().debug_options().xla_gpu_ptx_file().size() > 0 &&
+      matched_filename.empty()) {
+    VLOG(0) << "RunBackend() - For module with prefix '" << prefix
+            << "', we did not found a PTX file to load.";
+  }
+
+  if (!matched_filename.empty()) {
+    std::ifstream ifs(matched_filename, std::ifstream::in);
+    *ptx = std::string(std::istreambuf_iterator<char>(ifs),
+                       std::istreambuf_iterator<char>());
+    CHECK(!ptx->empty()) << "Empty or non existing PTX file: "
+                         << matched_filename;
+    return true;
+  }
+  return false;
+}
+
+}  // namespace
+
 // Prints a warning if the ptx->sass JIT in the driver has known bugs.
 //
 // Using such a driver only a problem if we fail to use ptxas to compile our ptx
@ -238,42 +274,6 @@ void WarnIfBadDriverJITVersion() {
  });
 }

-// Try to load ptx from files defined in the FLAGS. If successful, return true.
-bool MaybeLoadPtxFromFile(const HloModule* module, std::string* ptx) {
-  // If the xla_gpu_ptx_file options is set, be explicit when a file is used
-  // and warn when a file is not used to ease catching typo in filename.
-  std::string prefix = xla::FilenameFor(*module, "", *ptx);
-  std::string matched_filename;
-  for (const string& full_filename :
-       module->config().debug_options().xla_gpu_ptx_file()) {
-    // To ease comparing many PTX versions, accept different suffixes then
-    // the original filename.
-    auto filename = tensorflow::io::Basename(full_filename);
-    if (absl::StartsWith(filename, prefix)) {
-      matched_filename = full_filename;
-      VLOG(0) << "RunBackend() - Will load PTX from file: " << full_filename;
-      break;
-    }
-  }
-  if (module->config().debug_options().xla_gpu_ptx_file().size() > 0 &&
-      matched_filename.empty()) {
-    VLOG(0) << "RunBackend() - For module with prefix '" << prefix
-            << "', we did not found a PTX file to load.";
-  }
-
-  if (!matched_filename.empty()) {
-    std::ifstream ifs(matched_filename, std::ifstream::in);
-    *ptx = std::string(std::istreambuf_iterator<char>(ifs),
-                       std::istreambuf_iterator<char>());
-    CHECK(!ptx->empty()) << "Empty or non existing PTX file: "
-                         << matched_filename;
-    return true;
-  }
-  return false;
-}
-
-}  // namespace
-
 NVPTXCompiler::NVPTXCompiler()
    : GpuCompiler(stream_executor::cuda::kCudaPlatformId, nvptx::kTargetTriple,
                  nvptx::kDataLayout) {}
@ -415,7 +415,9 @@ std::vector<uint8> NVPTXCompiler::CompileGpuAsmOrGetCachedResult(
                  "using $PATH.",
                  hlo_module_config);
            }
-          } else {
+          } else if (maybe_cubin.status().code() !=
+                     tensorflow::error::Code::UNIMPLEMENTED) {
+            // If unimplemented is returned, we fallback to the driver.
            LOG(FATAL) << "ptxas returned an error during compilation of ptx "
                          "to sass: '"
                       << maybe_cubin.status() << "'  "
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
@ -30,6 +30,8 @@ limitations under the License.
 namespace xla {
 namespace gpu {

+void WarnIfBadDriverJITVersion();
+
 // NVPTXCompiler generates efficient GPU executables for NVPTX target.
 class NVPTXCompiler : public GpuCompiler {
 public:
--- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc
@ -562,9 +562,20 @@ StatusOr<std::unique_ptr<Executable>> MlirCompilerImpl::RunBackend(
      auto ptx, xla::gpu::nvptx::CompileToPtx(llvmModule.get(),
                                              GetGpuVersion(stream_exec),
                                              config, GetLibdeviceDir(config)));
-  TF_ASSIGN_OR_RETURN(
-      auto cubin, se::CompileGpuAsm(stream_exec->device_ordinal(), ptx.c_str(),
-                                    gpu::PtxOptsFromConfig(config)));
+  // Allow to fallback to the driver compilation when ptxas isn't able to
+  // compile.
+  StatusOr<std::vector<uint8>> maybe_cubin =
+      se::CompileGpuAsm(stream_exec->device_ordinal(), ptx.c_str(),
+                        gpu::PtxOptsFromConfig(config));
+  std::vector<uint8> cubin;
+  if (maybe_cubin.ok()) {
+    cubin = std::move(maybe_cubin).ValueOrDie();
+  } else if (maybe_cubin.status().code() ==
+             tensorflow::error::Code::UNIMPLEMENTED) {
+    xla::gpu::WarnIfBadDriverJITVersion();
+  } else {
+    return maybe_cubin.status();
+  }

  auto thunk_schedule = absl::make_unique<ThunkSchedule>(
      std::make_unique<gpu::ThunkSequence>(std::move(thunk_sequence)),
--- a/tensorflow/stream_executor/gpu/asm_compiler.cc
+++ b/tensorflow/stream_executor/gpu/asm_compiler.cc
@ -225,6 +225,21 @@ port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor,
  int exit_status = ptxas_info_dumper.Communicate(
      /*stdin_input=*/nullptr, /*stdout_output=*/nullptr, &stderr_output);
  if (exit_status != 0) {
+    //  It happens when the ptxas installed is too old for the current GPU.
+    //  Example error message associated with this error code:
+    //      ptxas fatal   : Value 'sm_80' is not defined for option 'gpu-name'
+    // In that case, fallback to the driver for compilation
+    if (absl::StartsWith(stderr_output, "ptxas fatal   : Value '") &&
+        absl::StrContains(stderr_output,
+                          "is not defined for option 'gpu-name'")) {
+      LOG(WARNING) << "Your CUDA software stack is old. We fallback to the"
+                   << " NVIDIA driver for some compilation. Update your CUDA"
+                   << " version to get the best performance."
+                   << " The ptxas error was: " << stderr_output;
+      return tensorflow::errors::Unimplemented(
+          ptxas_path, " ptxas too old. Falling back to the driver to compile.");
+    }
+
    return port::InternalError(
        absl::StrFormat("ptxas exited with non-zero error code %d, output: %s",
                        exit_status, stderr_output));