From 052243a12c7cd66be621747df4f73b52743d4bb2 Mon Sep 17 00:00:00 2001 From: Thomas Joerg Date: Fri, 29 May 2020 06:32:24 -0700 Subject: [PATCH] Improve logging when ptxas cannot be found. PiperOrigin-RevId: 313770396 Change-Id: I9dbbd70cb567fd173219c6744e3ca879fedeafc6 --- .../compiler/xla/debug_options_flags.cc | 3 +-- .../xla/service/gpu/nvptx_compiler.cc | 21 ++++++++++++------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc index cad73b593a2..958629c5fa6 100644 --- a/tensorflow/compiler/xla/debug_options_flags.cc +++ b/tensorflow/compiler/xla/debug_options_flags.cc @@ -71,8 +71,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.set_xla_force_host_platform_device_count(1); opts.set_xla_gpu_deterministic_reductions(false); opts.set_xla_cpu_enable_xprof_traceme(true); - // TODO(b/155295372): disable ptxas fallback by default. - opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(true); + opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false); return opts; } diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index b0b214832ea..eefa4661d37 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -385,6 +385,19 @@ std::vector NVPTXCompiler::CompileGpuAsmOrGetCachedResult( } else { if (maybe_cubin.status().code() == tensorflow::error::Code::NOT_FOUND) { + if (!hlo_module_config.debug_options() + .xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found()) { + PrintCantFindCudaMessage( + "Can't find ptxas binary in ${CUDA_DIR}/bin. Custom ptxas " + "location can be specified using $PATH.", + hlo_module_config); + LOG(FATAL) + << "Can't find ptxas binary. You can pass the flag " + "--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found " + "to use the GPU driver for compiling ptx instead. However " + "this option is discouraged and can lead to increased " + "memory concumptions and other subtle runtime issues."; + } // Missing ptxas is expected in some environments where CUDA SDK // binaries are not available. We don't want to spam logs with // identical warnings in this case. @@ -402,14 +415,6 @@ std::vector NVPTXCompiler::CompileGpuAsmOrGetCachedResult( "using $PATH.", hlo_module_config); } - CHECK(hlo_module_config.debug_options() - .xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found()) - << "There was an error when trying to compile ptx into sass " - "code. If you want to try falling back to the GPU driver to " - "jit compile ptx, you can use the flag " - "--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found." - " Use at your own risk though, it has known drawbacks like " - "increased memory consumption."; } else { LOG(FATAL) << "ptxas returned an error during compilation of ptx " "to sass: '"