Merge pull request #43888 from nouiz:upstream_old_ptxas

PiperOrigin-RevId: 337531954
Change-Id: I096cf095536bc8c819ab61f0020fea20eca341d6
This commit is contained in:
TensorFlower Gardener 2020-10-16 10:42:20 -07:00
commit de2c020a2a
4 changed files with 70 additions and 40 deletions

View File

@ -198,6 +198,42 @@ absl::optional<bool> CanShareBufferHint(const HloInstruction* user,
return absl::nullopt;
}
// Try to load ptx from files defined in the FLAGS. If successful, return true.
bool MaybeLoadPtxFromFile(const HloModule* module, std::string* ptx) {
// If the xla_gpu_ptx_file options is set, be explicit when a file is used
// and warn when a file is not used to ease catching typo in filename.
std::string prefix = xla::FilenameFor(*module, "", *ptx);
std::string matched_filename;
for (const string& full_filename :
module->config().debug_options().xla_gpu_ptx_file()) {
// To ease comparing many PTX versions, accept different suffixes then
// the original filename.
auto filename = tensorflow::io::Basename(full_filename);
if (absl::StartsWith(filename, prefix)) {
matched_filename = full_filename;
VLOG(0) << "RunBackend() - Will load PTX from file: " << full_filename;
break;
}
}
if (module->config().debug_options().xla_gpu_ptx_file().size() > 0 &&
matched_filename.empty()) {
VLOG(0) << "RunBackend() - For module with prefix '" << prefix
<< "', we did not found a PTX file to load.";
}
if (!matched_filename.empty()) {
std::ifstream ifs(matched_filename, std::ifstream::in);
*ptx = std::string(std::istreambuf_iterator<char>(ifs),
std::istreambuf_iterator<char>());
CHECK(!ptx->empty()) << "Empty or non existing PTX file: "
<< matched_filename;
return true;
}
return false;
}
} // namespace
// Prints a warning if the ptx->sass JIT in the driver has known bugs.
//
// Using such a driver only a problem if we fail to use ptxas to compile our ptx
@ -238,42 +274,6 @@ void WarnIfBadDriverJITVersion() {
});
}
// Try to load ptx from files defined in the FLAGS. If successful, return true.
bool MaybeLoadPtxFromFile(const HloModule* module, std::string* ptx) {
// If the xla_gpu_ptx_file options is set, be explicit when a file is used
// and warn when a file is not used to ease catching typo in filename.
std::string prefix = xla::FilenameFor(*module, "", *ptx);
std::string matched_filename;
for (const string& full_filename :
module->config().debug_options().xla_gpu_ptx_file()) {
// To ease comparing many PTX versions, accept different suffixes then
// the original filename.
auto filename = tensorflow::io::Basename(full_filename);
if (absl::StartsWith(filename, prefix)) {
matched_filename = full_filename;
VLOG(0) << "RunBackend() - Will load PTX from file: " << full_filename;
break;
}
}
if (module->config().debug_options().xla_gpu_ptx_file().size() > 0 &&
matched_filename.empty()) {
VLOG(0) << "RunBackend() - For module with prefix '" << prefix
<< "', we did not found a PTX file to load.";
}
if (!matched_filename.empty()) {
std::ifstream ifs(matched_filename, std::ifstream::in);
*ptx = std::string(std::istreambuf_iterator<char>(ifs),
std::istreambuf_iterator<char>());
CHECK(!ptx->empty()) << "Empty or non existing PTX file: "
<< matched_filename;
return true;
}
return false;
}
} // namespace
NVPTXCompiler::NVPTXCompiler()
: GpuCompiler(stream_executor::cuda::kCudaPlatformId, nvptx::kTargetTriple,
nvptx::kDataLayout) {}
@ -415,7 +415,9 @@ std::vector<uint8> NVPTXCompiler::CompileGpuAsmOrGetCachedResult(
"using $PATH.",
hlo_module_config);
}
} else {
} else if (maybe_cubin.status().code() !=
tensorflow::error::Code::UNIMPLEMENTED) {
// If unimplemented is returned, we fallback to the driver.
LOG(FATAL) << "ptxas returned an error during compilation of ptx "
"to sass: '"
<< maybe_cubin.status() << "' "

View File

@ -30,6 +30,8 @@ limitations under the License.
namespace xla {
namespace gpu {
void WarnIfBadDriverJITVersion();
// NVPTXCompiler generates efficient GPU executables for NVPTX target.
class NVPTXCompiler : public GpuCompiler {
public:

View File

@ -562,9 +562,20 @@ StatusOr<std::unique_ptr<Executable>> MlirCompilerImpl::RunBackend(
auto ptx, xla::gpu::nvptx::CompileToPtx(llvmModule.get(),
GetGpuVersion(stream_exec),
config, GetLibdeviceDir(config)));
TF_ASSIGN_OR_RETURN(
auto cubin, se::CompileGpuAsm(stream_exec->device_ordinal(), ptx.c_str(),
gpu::PtxOptsFromConfig(config)));
// Allow to fallback to the driver compilation when ptxas isn't able to
// compile.
StatusOr<std::vector<uint8>> maybe_cubin =
se::CompileGpuAsm(stream_exec->device_ordinal(), ptx.c_str(),
gpu::PtxOptsFromConfig(config));
std::vector<uint8> cubin;
if (maybe_cubin.ok()) {
cubin = std::move(maybe_cubin).ValueOrDie();
} else if (maybe_cubin.status().code() ==
tensorflow::error::Code::UNIMPLEMENTED) {
xla::gpu::WarnIfBadDriverJITVersion();
} else {
return maybe_cubin.status();
}
auto thunk_schedule = absl::make_unique<ThunkSchedule>(
std::make_unique<gpu::ThunkSequence>(std::move(thunk_sequence)),

View File

@ -225,6 +225,21 @@ port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor,
int exit_status = ptxas_info_dumper.Communicate(
/*stdin_input=*/nullptr, /*stdout_output=*/nullptr, &stderr_output);
if (exit_status != 0) {
// It happens when the ptxas installed is too old for the current GPU.
// Example error message associated with this error code:
// ptxas fatal : Value 'sm_80' is not defined for option 'gpu-name'
// In that case, fallback to the driver for compilation
if (absl::StartsWith(stderr_output, "ptxas fatal : Value '") &&
absl::StrContains(stderr_output,
"is not defined for option 'gpu-name'")) {
LOG(WARNING) << "Your CUDA software stack is old. We fallback to the"
<< " NVIDIA driver for some compilation. Update your CUDA"
<< " version to get the best performance."
<< " The ptxas error was: " << stderr_output;
return tensorflow::errors::Unimplemented(
ptxas_path, " ptxas too old. Falling back to the driver to compile.");
}
return port::InternalError(
absl::StrFormat("ptxas exited with non-zero error code %d, output: %s",
exit_status, stderr_output));