From 835bcfba8fb9cad703338b91fe0f9d6b21c61e72 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Wed, 19 Jun 2019 16:23:59 -0700
Subject: [PATCH] [XLA:GPU] Simplify libdevice-search code.

We can hardcode a path to libdevice for CUDA 9+, and that's all we support nowadays.

PiperOrigin-RevId: 254093960
---
 .../gpu/llvm_gpu_backend/nvptx_backend_lib.cc | 63 +++++--------------
 1 file changed, 15 insertions(+), 48 deletions(-)
diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc
index a05f6e48150..9f52f09004b 100644
--- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc
+++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc
@@ -68,47 +68,6 @@ namespace {
 // Default inline threshold value to use in llvm.
 const int kDefaultInlineThreshold = 1100;
 
-// Gets the libdevice filename for a particular compute capability.  When
-// presented with a GPU we don't recognize, we just return the libdevice from
-// compute_20.
-static string GetLibdeviceFilename(const string& libdevice_dir_path,
-                                   std::pair<int, int> compute_capability) {
-  // Since CUDA 9.0, all GPU versions are included in a single file
-  const char* unified_libdevice_filename = "libdevice.10.bc";
-  std::vector<string> unified_libdevice_files;
-  const Status status = tensorflow::Env::Default()->GetMatchingPaths(
-      tensorflow::io::JoinPath(libdevice_dir_path, unified_libdevice_filename),
-      &unified_libdevice_files);
-  if (status.ok() && unified_libdevice_files.size() == 1) {
-    return unified_libdevice_filename;
-  }
-  // There are only four libdevice files: compute_{20,30,35,50}.  Each GPU
-  // version gets mapped to one of these.  Note in particular that sm_60 and
-  // sm_61 map to libdevice.compute_30.
-  static auto* m = new std::map<std::pair<int, int>, int>({{{2, 0}, 20},
-                                                           {{2, 1}, 20},
-                                                           {{3, 0}, 30},
-                                                           {{3, 2}, 30},
-                                                           {{3, 5}, 35},
-                                                           {{3, 7}, 35},
-                                                           {{5, 0}, 50},
-                                                           {{5, 2}, 50},
-                                                           {{5, 3}, 50},
-                                                           {{6, 0}, 30},
-                                                           {{6, 1}, 30},
-                                                           {{6, 2}, 30}});
-  int libdevice_version = 20;
-  auto it = m->find(compute_capability);
-  if (it != m->end()) {
-    libdevice_version = it->second;
-  } else {
-    LOG(WARNING) << "Unknown compute capability (" << compute_capability.first
-                 << ", " << compute_capability.second << ") ."
-                 << "Defaulting to libdevice for compute_" << libdevice_version;
-  }
-  return absl::StrCat("libdevice.compute_", libdevice_version, ".10.bc");
-}
-
 // Gets the GPU name as it's known to LLVM for a given compute capability.  If
 // we see an unrecognized compute capability, we return "sm_35".
 static string GetSmName(std::pair<int, int> compute_capability) {
@@ -303,14 +262,22 @@ Status LinkLibdeviceIfNecessary(llvm::Module* module,
     return Status::OK();
   }
 
-  llvm::Linker linker(*module);
-  string libdevice_path = tensorflow::io::JoinPath(
-      libdevice_dir_path,
-      GetLibdeviceFilename(libdevice_dir_path, compute_capability));
-  TF_RETURN_IF_ERROR(tensorflow::Env::Default()->FileExists(libdevice_path));
+  // CUDA 9+ uses a single libdevice file for all devices, and we don't support
+  // older CUDAs.
+  string libdevice_path =
+      tensorflow::io::JoinPath(libdevice_dir_path, "libdevice.10.bc");
+  if (!tensorflow::Env::Default()->FileExists(libdevice_path).ok()) {
+    LOG(WARNING)
+        << "libdevice is required by this HLO module but was not found at "
+        << libdevice_path;
+    return xla::InternalError("libdevice not found at %s", libdevice_path);
+  }
+
   VLOG(1) << "Linking with libdevice from: " << libdevice_path;
   std::unique_ptr<llvm::Module> libdevice_module =
       LoadIRModule(libdevice_path, &module->getContext());
+
+  llvm::Linker linker(*module);
   if (linker.linkInModule(
           std::move(libdevice_module), llvm::Linker::Flags::LinkOnlyNeeded,
           [](Module& M, const StringSet<>& GVS) {
@@ -318,8 +285,8 @@ Status LinkLibdeviceIfNecessary(llvm::Module* module,
               return !GV.hasName() || (GVS.count(GV.getName()) == 0);
             });
           })) {
-    return tensorflow::errors::Internal(
-        absl::StrCat("Error linking libdevice from ", libdevice_path));
+    return xla::InternalError("Error linking libdevice from %s",
+                              libdevice_path);
   }
   return Status::OK();
 }