[XLA:GPU] Simplify libdevice-search code.
We can hardcode a path to libdevice for CUDA 9+, and that's all we support nowadays. PiperOrigin-RevId: 254093960
This commit is contained in:
parent
c8f1b3826f
commit
835bcfba8f
@ -68,47 +68,6 @@ namespace {
|
|||||||
// Default inline threshold value to use in llvm.
|
// Default inline threshold value to use in llvm.
|
||||||
const int kDefaultInlineThreshold = 1100;
|
const int kDefaultInlineThreshold = 1100;
|
||||||
|
|
||||||
// Gets the libdevice filename for a particular compute capability. When
|
|
||||||
// presented with a GPU we don't recognize, we just return the libdevice from
|
|
||||||
// compute_20.
|
|
||||||
static string GetLibdeviceFilename(const string& libdevice_dir_path,
|
|
||||||
std::pair<int, int> compute_capability) {
|
|
||||||
// Since CUDA 9.0, all GPU versions are included in a single file
|
|
||||||
const char* unified_libdevice_filename = "libdevice.10.bc";
|
|
||||||
std::vector<string> unified_libdevice_files;
|
|
||||||
const Status status = tensorflow::Env::Default()->GetMatchingPaths(
|
|
||||||
tensorflow::io::JoinPath(libdevice_dir_path, unified_libdevice_filename),
|
|
||||||
&unified_libdevice_files);
|
|
||||||
if (status.ok() && unified_libdevice_files.size() == 1) {
|
|
||||||
return unified_libdevice_filename;
|
|
||||||
}
|
|
||||||
// There are only four libdevice files: compute_{20,30,35,50}. Each GPU
|
|
||||||
// version gets mapped to one of these. Note in particular that sm_60 and
|
|
||||||
// sm_61 map to libdevice.compute_30.
|
|
||||||
static auto* m = new std::map<std::pair<int, int>, int>({{{2, 0}, 20},
|
|
||||||
{{2, 1}, 20},
|
|
||||||
{{3, 0}, 30},
|
|
||||||
{{3, 2}, 30},
|
|
||||||
{{3, 5}, 35},
|
|
||||||
{{3, 7}, 35},
|
|
||||||
{{5, 0}, 50},
|
|
||||||
{{5, 2}, 50},
|
|
||||||
{{5, 3}, 50},
|
|
||||||
{{6, 0}, 30},
|
|
||||||
{{6, 1}, 30},
|
|
||||||
{{6, 2}, 30}});
|
|
||||||
int libdevice_version = 20;
|
|
||||||
auto it = m->find(compute_capability);
|
|
||||||
if (it != m->end()) {
|
|
||||||
libdevice_version = it->second;
|
|
||||||
} else {
|
|
||||||
LOG(WARNING) << "Unknown compute capability (" << compute_capability.first
|
|
||||||
<< ", " << compute_capability.second << ") ."
|
|
||||||
<< "Defaulting to libdevice for compute_" << libdevice_version;
|
|
||||||
}
|
|
||||||
return absl::StrCat("libdevice.compute_", libdevice_version, ".10.bc");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Gets the GPU name as it's known to LLVM for a given compute capability. If
|
// Gets the GPU name as it's known to LLVM for a given compute capability. If
|
||||||
// we see an unrecognized compute capability, we return "sm_35".
|
// we see an unrecognized compute capability, we return "sm_35".
|
||||||
static string GetSmName(std::pair<int, int> compute_capability) {
|
static string GetSmName(std::pair<int, int> compute_capability) {
|
||||||
@ -303,14 +262,22 @@ Status LinkLibdeviceIfNecessary(llvm::Module* module,
|
|||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Linker linker(*module);
|
// CUDA 9+ uses a single libdevice file for all devices, and we don't support
|
||||||
string libdevice_path = tensorflow::io::JoinPath(
|
// older CUDAs.
|
||||||
libdevice_dir_path,
|
string libdevice_path =
|
||||||
GetLibdeviceFilename(libdevice_dir_path, compute_capability));
|
tensorflow::io::JoinPath(libdevice_dir_path, "libdevice.10.bc");
|
||||||
TF_RETURN_IF_ERROR(tensorflow::Env::Default()->FileExists(libdevice_path));
|
if (!tensorflow::Env::Default()->FileExists(libdevice_path).ok()) {
|
||||||
|
LOG(WARNING)
|
||||||
|
<< "libdevice is required by this HLO module but was not found at "
|
||||||
|
<< libdevice_path;
|
||||||
|
return xla::InternalError("libdevice not found at %s", libdevice_path);
|
||||||
|
}
|
||||||
|
|
||||||
VLOG(1) << "Linking with libdevice from: " << libdevice_path;
|
VLOG(1) << "Linking with libdevice from: " << libdevice_path;
|
||||||
std::unique_ptr<llvm::Module> libdevice_module =
|
std::unique_ptr<llvm::Module> libdevice_module =
|
||||||
LoadIRModule(libdevice_path, &module->getContext());
|
LoadIRModule(libdevice_path, &module->getContext());
|
||||||
|
|
||||||
|
llvm::Linker linker(*module);
|
||||||
if (linker.linkInModule(
|
if (linker.linkInModule(
|
||||||
std::move(libdevice_module), llvm::Linker::Flags::LinkOnlyNeeded,
|
std::move(libdevice_module), llvm::Linker::Flags::LinkOnlyNeeded,
|
||||||
[](Module& M, const StringSet<>& GVS) {
|
[](Module& M, const StringSet<>& GVS) {
|
||||||
@ -318,8 +285,8 @@ Status LinkLibdeviceIfNecessary(llvm::Module* module,
|
|||||||
return !GV.hasName() || (GVS.count(GV.getName()) == 0);
|
return !GV.hasName() || (GVS.count(GV.getName()) == 0);
|
||||||
});
|
});
|
||||||
})) {
|
})) {
|
||||||
return tensorflow::errors::Internal(
|
return xla::InternalError("Error linking libdevice from %s",
|
||||||
absl::StrCat("Error linking libdevice from ", libdevice_path));
|
libdevice_path);
|
||||||
}
|
}
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user