Improve logging when ptxas cannot be found.

PiperOrigin-RevId: 313770396
Change-Id: I9dbbd70cb567fd173219c6744e3ca879fedeafc6
This commit is contained in:
Thomas Joerg 2020-05-29 06:32:24 -07:00 committed by TensorFlower Gardener
parent e0dc15cec6
commit 052243a12c
2 changed files with 14 additions and 10 deletions

View File

@ -71,8 +71,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
opts.set_xla_force_host_platform_device_count(1); opts.set_xla_force_host_platform_device_count(1);
opts.set_xla_gpu_deterministic_reductions(false); opts.set_xla_gpu_deterministic_reductions(false);
opts.set_xla_cpu_enable_xprof_traceme(true); opts.set_xla_cpu_enable_xprof_traceme(true);
// TODO(b/155295372): disable ptxas fallback by default. opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false);
opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(true);
return opts; return opts;
} }

View File

@ -385,6 +385,19 @@ std::vector<uint8> NVPTXCompiler::CompileGpuAsmOrGetCachedResult(
} else { } else {
if (maybe_cubin.status().code() == if (maybe_cubin.status().code() ==
tensorflow::error::Code::NOT_FOUND) { tensorflow::error::Code::NOT_FOUND) {
if (!hlo_module_config.debug_options()
.xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found()) {
PrintCantFindCudaMessage(
"Can't find ptxas binary in ${CUDA_DIR}/bin. Custom ptxas "
"location can be specified using $PATH.",
hlo_module_config);
LOG(FATAL)
<< "Can't find ptxas binary. You can pass the flag "
"--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found "
"to use the GPU driver for compiling ptx instead. However "
"this option is discouraged and can lead to increased "
"memory concumptions and other subtle runtime issues.";
}
// Missing ptxas is expected in some environments where CUDA SDK // Missing ptxas is expected in some environments where CUDA SDK
// binaries are not available. We don't want to spam logs with // binaries are not available. We don't want to spam logs with
// identical warnings in this case. // identical warnings in this case.
@ -402,14 +415,6 @@ std::vector<uint8> NVPTXCompiler::CompileGpuAsmOrGetCachedResult(
"using $PATH.", "using $PATH.",
hlo_module_config); hlo_module_config);
} }
CHECK(hlo_module_config.debug_options()
.xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found())
<< "There was an error when trying to compile ptx into sass "
"code. If you want to try falling back to the GPU driver to "
"jit compile ptx, you can use the flag "
"--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found."
" Use at your own risk though, it has known drawbacks like "
"increased memory consumption.";
} else { } else {
LOG(FATAL) << "ptxas returned an error during compilation of ptx " LOG(FATAL) << "ptxas returned an error during compilation of ptx "
"to sass: '" "to sass: '"