Improve CUDA runtime dependencies search.

tensorflow::CudaRoot() now may return multiple possible locations of the CUDA root. PiperOrigin-RevId: 225091635
2018-12-11 16:24:24 -08:00 · 2018-12-11 16:24:24 -08:00 · bafb874798
commit bafb874798
parent 33bc0b9788
7 changed files with 52 additions and 122 deletions
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@ -108,27 +108,33 @@ namespace {

 namespace tracing = tensorflow::tracing;

-// Returns the directory containing nvvm libdevice files.  config_cuda_data_dir
-// should be equal to config().debug_options().xla_gpu_cuda_data_dir() of the
-// HloModule being compiled.
-string GetLibdeviceDir(const string& config_cuda_data_dir) {
-  std::vector<string> potential_libdevice_dirs;
-  if (!config_cuda_data_dir.empty()) {
-    potential_libdevice_dirs.push_back(config_cuda_data_dir);
-  }
-  potential_libdevice_dirs.push_back(tensorflow::LibdeviceRoot());
+// Returns a vector of potential locations of the CUDA root directory.
+std::vector<string> GetCudaRootCandidates(
+    const HloModuleConfig& hlo_module_config) {
+  std::vector<string> potential_cuda_roots = tensorflow::CandidateCudaRoots();

-  // Tries all potential libdevice directories in the order they are inserted.
-  // Returns the first directory that exists in the file system.
-  for (const string& potential_libdevice_dir : potential_libdevice_dirs) {
-    if (tensorflow::Env::Default()->IsDirectory(potential_libdevice_dir).ok()) {
-      VLOG(2) << "Found libdevice dir " << potential_libdevice_dir;
-      return potential_libdevice_dir;
-    }
-    VLOG(2) << "Unable to find potential libdevice dir "
-            << potential_libdevice_dir;
+  // CUDA location explicitly specified by user via --xla_gpu_cuda_data_dir has
+  // highest priority.
+  string xla_gpu_cuda_data_dir =
+      hlo_module_config.debug_options().xla_gpu_cuda_data_dir();
+  if (!xla_gpu_cuda_data_dir.empty()) {
+    potential_cuda_roots.insert(potential_cuda_roots.begin(),
+                                xla_gpu_cuda_data_dir);
  }
+  return potential_cuda_roots;
+}

+// Returns the directory containing nvvm libdevice files.
+string GetLibdeviceDir(const HloModuleConfig& hlo_module_config) {
+  for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
+    string libdevice_dir =
+        tensorflow::io::JoinPath(cuda_root, "nvvm", "libdevice");
+    VLOG(2) << "Looking for libdevice at " << libdevice_dir;
+    if (tensorflow::Env::Default()->IsDirectory(libdevice_dir).ok()) {
+      VLOG(2) << "Found libdevice dir " << libdevice_dir;
+      return libdevice_dir;
+    }
+  }
  LOG(WARNING) << "Unable to find libdevice dir. Using '.'";
  // Last resort: maybe in the current folder.
  return ".";
@ -478,14 +484,19 @@ void WarnIfBadDriverJITVersion() {

 // Compiles the given PTX string using ptxas and returns the resulting machine
 // code (i.e. a cubin) as a byte array.
-StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
-                                        int cc_minor,
-                                        bool disable_ptx_optimizations) {
+StatusOr<std::vector<uint8>> CompilePtx(
+    const string& ptx, int cc_major, int cc_minor,
+    const HloModuleConfig& hlo_module_config) {
  tracing::ScopedActivity activity("Compile PTX", /*is_expensive=*/true);
-  const string ptxas_path =
-      tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas");
-  VLOG(2) << "Checking ptxas at " << ptxas_path;
  auto env = tensorflow::Env::Default();
+  string ptxas_path;
+  for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
+    ptxas_path = tensorflow::io::JoinPath(cuda_root, "bin", "ptxas");
+    VLOG(2) << "Looking for ptxas at " << ptxas_path;
+    if (env->FileExists(ptxas_path).ok()) {
+      break;
+    }
+  }
  TF_RETURN_IF_ERROR(env->FileExists(ptxas_path));
  VLOG(2) << "Using ptxas at " << ptxas_path;

@ -520,7 +531,7 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
  if (VLOG_IS_ON(2)) {
    ptxas_args.push_back("-v");
  }
-  if (disable_ptx_optimizations) {
+  if (hlo_module_config.debug_options().xla_gpu_disable_ptxas_optimizations()) {
    ptxas_args.push_back("-O0");
  }
  ptxas_info_dumper.SetProgram(ptxas_path, ptxas_args);
@ -685,12 +696,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
    // Find the directory containing libdevice.  To avoid searching for it every
    // time, we have a one-element cache, keyed on the module's config's
    // cuda_data_dir.
-    const auto& config_cuda_data_dir =
-        module->config().debug_options().xla_gpu_cuda_data_dir();
-    if (cached_libdevice_dir_.empty() ||
-        cached_cuda_data_dir_ != config_cuda_data_dir) {
-      cached_cuda_data_dir_ = config_cuda_data_dir;
-      cached_libdevice_dir_ = GetLibdeviceDir(config_cuda_data_dir);
+    if (cached_libdevice_dir_.empty()) {
+      cached_libdevice_dir_ = GetLibdeviceDir(module->config());
    }
    libdevice_dir = cached_libdevice_dir_;
  }
@ -743,9 +750,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
    }
  }

-  const std::vector<uint8> cubin = CompilePtxOrGetCachedResult(
-      ptx, cc_major, cc_minor,
-      module->config().debug_options().xla_gpu_disable_ptxas_optimizations());
+  const std::vector<uint8> cubin =
+      CompilePtxOrGetCachedResult(ptx, cc_major, cc_minor, module->config());

  auto thunk_schedule = absl::make_unique<ThunkSchedule>(
      ir_emitter.ConsumeThunkSequence(), std::move(stream_assignment),
@ -779,7 +785,7 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(

 std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
    const string& ptx, int cc_major, int cc_minor,
-    bool disable_ptx_optimizations) {
+    const HloModuleConfig& hlo_module_config) {
  XLA_SCOPED_LOGGING_TIMER("NVPTXCompiler::CompilePtxOrGetCachedResult");
  tracing::ScopedActivity activity("PTX->CUBIN", /*is_expensive=*/true);
  bool inserted;
@ -807,8 +813,8 @@ std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
    if (inserted) {
      CHECK(!cache_value->compilation_done);
      if (!ptx.empty()) {
-        StatusOr<std::vector<uint8>> maybe_cubin = CompilePtx(
-            *cache_ptx, cc_major, cc_minor, disable_ptx_optimizations);
+        StatusOr<std::vector<uint8>> maybe_cubin =
+            CompilePtx(*cache_ptx, cc_major, cc_minor, hlo_module_config);
        if (maybe_cubin.ok()) {
          cache_value->cubin_data = std::move(maybe_cubin).ValueOrDie();
          VLOG(2) << "Compiled PTX size:" << ptx.size()
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
@ -99,7 +99,7 @@ class NVPTXCompiler : public LLVMCompiler {
  // compiled cubin.  If compilation was unsuccessful, returns an empty vector.
  std::vector<uint8> CompilePtxOrGetCachedResult(
      const string& ptx, int cc_major, int cc_minor,
-      bool disable_ptx_optimizations);
+      const HloModuleConfig& hlo_module_config);

  // The compilation_cache_ map is a cache from {ptx string, cc_major, cc_minor}
  // -> cubin so we don't recompile the same ptx twice.  This is important for
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -4062,20 +4062,6 @@ tf_cuda_cc_test(
    ],
 )

-tf_cc_test_gpu(
-    name = "cuda_libdevice_path_test",
-    size = "small",
-    srcs = ["platform/cuda_libdevice_path_test.cc"],
-    linkstatic = tf_kernel_tests_linkstatic(),
-    tags = tf_cuda_tests_tags(),
-    deps = [
-        ":cuda_libdevice_path",
-        ":lib",
-        ":test",
-        ":test_main",
-    ],
-)
-
 tf_cuda_only_cc_test(
    name = "util_cuda_kernel_helper_test",
    srcs = [
@ -4931,7 +4917,7 @@ filegroup(

 cc_library(
    name = "cuda_libdevice_path",
-    srcs = ["platform/cuda_libdevice_path.cc"] + tf_additional_libdevice_srcs(),
+    srcs = tf_additional_libdevice_srcs(),
    hdrs = ["platform/cuda_libdevice_path.h"],
    copts = tf_copts(),
    data = tf_additional_libdevice_data(),
--- a/tensorflow/core/platform/cuda_libdevice_path.cc
+++ b/tensorflow/core/platform/cuda_libdevice_path.cc
@ -1,26 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/platform/cuda_libdevice_path.h"
-
-#include "tensorflow/core/lib/io/path.h"
-
-namespace tensorflow {
-
-string LibdeviceRoot() {
-  return tensorflow::io::JoinPath(tensorflow::CudaRoot(), "nvvm/libdevice");
-}
-
-}  // namespace tensorflow
--- a/tensorflow/core/platform/cuda_libdevice_path.h
+++ b/tensorflow/core/platform/cuda_libdevice_path.h
@ -16,16 +16,14 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
 #define TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_

+#include <vector>
 #include "tensorflow/core/platform/types.h"

 namespace tensorflow {

-// Returns the root directory of the CUDA SDK, which contains sub-folders such
-// as bin, lib64, and nvvm.
-string CudaRoot();
-
-// Returns the directory that contains nvvm libdevice files in the CUDA SDK.
-string LibdeviceRoot();
+// Returns, in order of preference, potential locations of the root directory of
+// the CUDA SDK, which contains sub-folders such as bin, lib64, and nvvm.
+std::vector<string> CandidateCudaRoots();

 }  // namespace tensorflow

--- a/tensorflow/core/platform/cuda_libdevice_path_test.cc
+++ b/tensorflow/core/platform/cuda_libdevice_path_test.cc
@ -1,35 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/platform/cuda_libdevice_path.h"
-
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-
-#if GOOGLE_CUDA
-TEST(CudaLibdevicePathTest, LibdevicePath) {
-  VLOG(2) << "Libdevice root = " << LibdeviceRoot();
-  std::vector<string> libdevice_files;
-  TF_EXPECT_OK(Env::Default()->GetMatchingPaths(
-      io::JoinPath(LibdeviceRoot(), "libdevice.*.bc"), &libdevice_files));
-  EXPECT_LT(0, libdevice_files.size());
-}
-#endif
-
-}  // namespace tensorflow
--- a/tensorflow/core/platform/default/cuda_libdevice_path.cc
+++ b/tensorflow/core/platform/default/cuda_libdevice_path.cc
@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/platform/cuda_libdevice_path.h"

 #include <stdlib.h>
+#include <vector>

 #if !defined(PLATFORM_GOOGLE)
 #include "cuda/cuda_config.h"
@ -24,9 +25,9 @@ limitations under the License.

 namespace tensorflow {

-string CudaRoot() {
+std::vector<string> CandidateCudaRoots() {
  VLOG(3) << "CUDA root = " << TF_CUDA_TOOLKIT_PATH;
-  return TF_CUDA_TOOLKIT_PATH;
+  return {TF_CUDA_TOOLKIT_PATH};
 }

 }  // namespace tensorflow