Merge pull request #43888 from nouiz:upstream_old_ptxas
PiperOrigin-RevId: 337531954 Change-Id: I096cf095536bc8c819ab61f0020fea20eca341d6
This commit is contained in:
commit
de2c020a2a
@ -198,6 +198,42 @@ absl::optional<bool> CanShareBufferHint(const HloInstruction* user,
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
// Try to load ptx from files defined in the FLAGS. If successful, return true.
|
||||
bool MaybeLoadPtxFromFile(const HloModule* module, std::string* ptx) {
|
||||
// If the xla_gpu_ptx_file options is set, be explicit when a file is used
|
||||
// and warn when a file is not used to ease catching typo in filename.
|
||||
std::string prefix = xla::FilenameFor(*module, "", *ptx);
|
||||
std::string matched_filename;
|
||||
for (const string& full_filename :
|
||||
module->config().debug_options().xla_gpu_ptx_file()) {
|
||||
// To ease comparing many PTX versions, accept different suffixes then
|
||||
// the original filename.
|
||||
auto filename = tensorflow::io::Basename(full_filename);
|
||||
if (absl::StartsWith(filename, prefix)) {
|
||||
matched_filename = full_filename;
|
||||
VLOG(0) << "RunBackend() - Will load PTX from file: " << full_filename;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (module->config().debug_options().xla_gpu_ptx_file().size() > 0 &&
|
||||
matched_filename.empty()) {
|
||||
VLOG(0) << "RunBackend() - For module with prefix '" << prefix
|
||||
<< "', we did not found a PTX file to load.";
|
||||
}
|
||||
|
||||
if (!matched_filename.empty()) {
|
||||
std::ifstream ifs(matched_filename, std::ifstream::in);
|
||||
*ptx = std::string(std::istreambuf_iterator<char>(ifs),
|
||||
std::istreambuf_iterator<char>());
|
||||
CHECK(!ptx->empty()) << "Empty or non existing PTX file: "
|
||||
<< matched_filename;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Prints a warning if the ptx->sass JIT in the driver has known bugs.
|
||||
//
|
||||
// Using such a driver only a problem if we fail to use ptxas to compile our ptx
|
||||
@ -238,42 +274,6 @@ void WarnIfBadDriverJITVersion() {
|
||||
});
|
||||
}
|
||||
|
||||
// Try to load ptx from files defined in the FLAGS. If successful, return true.
|
||||
bool MaybeLoadPtxFromFile(const HloModule* module, std::string* ptx) {
|
||||
// If the xla_gpu_ptx_file options is set, be explicit when a file is used
|
||||
// and warn when a file is not used to ease catching typo in filename.
|
||||
std::string prefix = xla::FilenameFor(*module, "", *ptx);
|
||||
std::string matched_filename;
|
||||
for (const string& full_filename :
|
||||
module->config().debug_options().xla_gpu_ptx_file()) {
|
||||
// To ease comparing many PTX versions, accept different suffixes then
|
||||
// the original filename.
|
||||
auto filename = tensorflow::io::Basename(full_filename);
|
||||
if (absl::StartsWith(filename, prefix)) {
|
||||
matched_filename = full_filename;
|
||||
VLOG(0) << "RunBackend() - Will load PTX from file: " << full_filename;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (module->config().debug_options().xla_gpu_ptx_file().size() > 0 &&
|
||||
matched_filename.empty()) {
|
||||
VLOG(0) << "RunBackend() - For module with prefix '" << prefix
|
||||
<< "', we did not found a PTX file to load.";
|
||||
}
|
||||
|
||||
if (!matched_filename.empty()) {
|
||||
std::ifstream ifs(matched_filename, std::ifstream::in);
|
||||
*ptx = std::string(std::istreambuf_iterator<char>(ifs),
|
||||
std::istreambuf_iterator<char>());
|
||||
CHECK(!ptx->empty()) << "Empty or non existing PTX file: "
|
||||
<< matched_filename;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
NVPTXCompiler::NVPTXCompiler()
|
||||
: GpuCompiler(stream_executor::cuda::kCudaPlatformId, nvptx::kTargetTriple,
|
||||
nvptx::kDataLayout) {}
|
||||
@ -415,7 +415,9 @@ std::vector<uint8> NVPTXCompiler::CompileGpuAsmOrGetCachedResult(
|
||||
"using $PATH.",
|
||||
hlo_module_config);
|
||||
}
|
||||
} else {
|
||||
} else if (maybe_cubin.status().code() !=
|
||||
tensorflow::error::Code::UNIMPLEMENTED) {
|
||||
// If unimplemented is returned, we fallback to the driver.
|
||||
LOG(FATAL) << "ptxas returned an error during compilation of ptx "
|
||||
"to sass: '"
|
||||
<< maybe_cubin.status() << "' "
|
||||
|
@ -30,6 +30,8 @@ limitations under the License.
|
||||
namespace xla {
|
||||
namespace gpu {
|
||||
|
||||
void WarnIfBadDriverJITVersion();
|
||||
|
||||
// NVPTXCompiler generates efficient GPU executables for NVPTX target.
|
||||
class NVPTXCompiler : public GpuCompiler {
|
||||
public:
|
||||
|
@ -562,9 +562,20 @@ StatusOr<std::unique_ptr<Executable>> MlirCompilerImpl::RunBackend(
|
||||
auto ptx, xla::gpu::nvptx::CompileToPtx(llvmModule.get(),
|
||||
GetGpuVersion(stream_exec),
|
||||
config, GetLibdeviceDir(config)));
|
||||
TF_ASSIGN_OR_RETURN(
|
||||
auto cubin, se::CompileGpuAsm(stream_exec->device_ordinal(), ptx.c_str(),
|
||||
gpu::PtxOptsFromConfig(config)));
|
||||
// Allow to fallback to the driver compilation when ptxas isn't able to
|
||||
// compile.
|
||||
StatusOr<std::vector<uint8>> maybe_cubin =
|
||||
se::CompileGpuAsm(stream_exec->device_ordinal(), ptx.c_str(),
|
||||
gpu::PtxOptsFromConfig(config));
|
||||
std::vector<uint8> cubin;
|
||||
if (maybe_cubin.ok()) {
|
||||
cubin = std::move(maybe_cubin).ValueOrDie();
|
||||
} else if (maybe_cubin.status().code() ==
|
||||
tensorflow::error::Code::UNIMPLEMENTED) {
|
||||
xla::gpu::WarnIfBadDriverJITVersion();
|
||||
} else {
|
||||
return maybe_cubin.status();
|
||||
}
|
||||
|
||||
auto thunk_schedule = absl::make_unique<ThunkSchedule>(
|
||||
std::make_unique<gpu::ThunkSequence>(std::move(thunk_sequence)),
|
||||
|
@ -225,6 +225,21 @@ port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor,
|
||||
int exit_status = ptxas_info_dumper.Communicate(
|
||||
/*stdin_input=*/nullptr, /*stdout_output=*/nullptr, &stderr_output);
|
||||
if (exit_status != 0) {
|
||||
// It happens when the ptxas installed is too old for the current GPU.
|
||||
// Example error message associated with this error code:
|
||||
// ptxas fatal : Value 'sm_80' is not defined for option 'gpu-name'
|
||||
// In that case, fallback to the driver for compilation
|
||||
if (absl::StartsWith(stderr_output, "ptxas fatal : Value '") &&
|
||||
absl::StrContains(stderr_output,
|
||||
"is not defined for option 'gpu-name'")) {
|
||||
LOG(WARNING) << "Your CUDA software stack is old. We fallback to the"
|
||||
<< " NVIDIA driver for some compilation. Update your CUDA"
|
||||
<< " version to get the best performance."
|
||||
<< " The ptxas error was: " << stderr_output;
|
||||
return tensorflow::errors::Unimplemented(
|
||||
ptxas_path, " ptxas too old. Falling back to the driver to compile.");
|
||||
}
|
||||
|
||||
return port::InternalError(
|
||||
absl::StrFormat("ptxas exited with non-zero error code %d, output: %s",
|
||||
exit_status, stderr_output));
|
||||
|
Loading…
Reference in New Issue
Block a user