From 835bcfba8fb9cad703338b91fe0f9d6b21c61e72 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 19 Jun 2019 16:23:59 -0700 Subject: [PATCH] [XLA:GPU] Simplify libdevice-search code. We can hardcode a path to libdevice for CUDA 9+, and that's all we support nowadays. PiperOrigin-RevId: 254093960 --- .../gpu/llvm_gpu_backend/nvptx_backend_lib.cc | 63 +++++-------------- 1 file changed, 15 insertions(+), 48 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc index a05f6e48150..9f52f09004b 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc @@ -68,47 +68,6 @@ namespace { // Default inline threshold value to use in llvm. const int kDefaultInlineThreshold = 1100; -// Gets the libdevice filename for a particular compute capability. When -// presented with a GPU we don't recognize, we just return the libdevice from -// compute_20. -static string GetLibdeviceFilename(const string& libdevice_dir_path, - std::pair compute_capability) { - // Since CUDA 9.0, all GPU versions are included in a single file - const char* unified_libdevice_filename = "libdevice.10.bc"; - std::vector unified_libdevice_files; - const Status status = tensorflow::Env::Default()->GetMatchingPaths( - tensorflow::io::JoinPath(libdevice_dir_path, unified_libdevice_filename), - &unified_libdevice_files); - if (status.ok() && unified_libdevice_files.size() == 1) { - return unified_libdevice_filename; - } - // There are only four libdevice files: compute_{20,30,35,50}. Each GPU - // version gets mapped to one of these. Note in particular that sm_60 and - // sm_61 map to libdevice.compute_30. - static auto* m = new std::map, int>({{{2, 0}, 20}, - {{2, 1}, 20}, - {{3, 0}, 30}, - {{3, 2}, 30}, - {{3, 5}, 35}, - {{3, 7}, 35}, - {{5, 0}, 50}, - {{5, 2}, 50}, - {{5, 3}, 50}, - {{6, 0}, 30}, - {{6, 1}, 30}, - {{6, 2}, 30}}); - int libdevice_version = 20; - auto it = m->find(compute_capability); - if (it != m->end()) { - libdevice_version = it->second; - } else { - LOG(WARNING) << "Unknown compute capability (" << compute_capability.first - << ", " << compute_capability.second << ") ." - << "Defaulting to libdevice for compute_" << libdevice_version; - } - return absl::StrCat("libdevice.compute_", libdevice_version, ".10.bc"); -} - // Gets the GPU name as it's known to LLVM for a given compute capability. If // we see an unrecognized compute capability, we return "sm_35". static string GetSmName(std::pair compute_capability) { @@ -303,14 +262,22 @@ Status LinkLibdeviceIfNecessary(llvm::Module* module, return Status::OK(); } - llvm::Linker linker(*module); - string libdevice_path = tensorflow::io::JoinPath( - libdevice_dir_path, - GetLibdeviceFilename(libdevice_dir_path, compute_capability)); - TF_RETURN_IF_ERROR(tensorflow::Env::Default()->FileExists(libdevice_path)); + // CUDA 9+ uses a single libdevice file for all devices, and we don't support + // older CUDAs. + string libdevice_path = + tensorflow::io::JoinPath(libdevice_dir_path, "libdevice.10.bc"); + if (!tensorflow::Env::Default()->FileExists(libdevice_path).ok()) { + LOG(WARNING) + << "libdevice is required by this HLO module but was not found at " + << libdevice_path; + return xla::InternalError("libdevice not found at %s", libdevice_path); + } + VLOG(1) << "Linking with libdevice from: " << libdevice_path; std::unique_ptr libdevice_module = LoadIRModule(libdevice_path, &module->getContext()); + + llvm::Linker linker(*module); if (linker.linkInModule( std::move(libdevice_module), llvm::Linker::Flags::LinkOnlyNeeded, [](Module& M, const StringSet<>& GVS) { @@ -318,8 +285,8 @@ Status LinkLibdeviceIfNecessary(llvm::Module* module, return !GV.hasName() || (GVS.count(GV.getName()) == 0); }); })) { - return tensorflow::errors::Internal( - absl::StrCat("Error linking libdevice from ", libdevice_path)); + return xla::InternalError("Error linking libdevice from %s", + libdevice_path); } return Status::OK(); }