From bafb8747983fbcf186ffb063ed39dbb0a18e3c8e Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Tue, 11 Dec 2018 16:24:24 -0800 Subject: [PATCH] Improve CUDA runtime dependencies search. tensorflow::CudaRoot() now may return multiple possible locations of the CUDA root. PiperOrigin-RevId: 225091635 --- .../xla/service/gpu/nvptx_compiler.cc | 80 ++++++++++--------- .../compiler/xla/service/gpu/nvptx_compiler.h | 2 +- tensorflow/core/BUILD | 16 +--- .../core/platform/cuda_libdevice_path.cc | 26 ------ .../core/platform/cuda_libdevice_path.h | 10 +-- .../core/platform/cuda_libdevice_path_test.cc | 35 -------- .../platform/default/cuda_libdevice_path.cc | 5 +- 7 files changed, 52 insertions(+), 122 deletions(-) delete mode 100644 tensorflow/core/platform/cuda_libdevice_path.cc delete mode 100644 tensorflow/core/platform/cuda_libdevice_path_test.cc diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index f3e17d88824..60f2116e608 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -108,27 +108,33 @@ namespace { namespace tracing = tensorflow::tracing; -// Returns the directory containing nvvm libdevice files. config_cuda_data_dir -// should be equal to config().debug_options().xla_gpu_cuda_data_dir() of the -// HloModule being compiled. -string GetLibdeviceDir(const string& config_cuda_data_dir) { - std::vector potential_libdevice_dirs; - if (!config_cuda_data_dir.empty()) { - potential_libdevice_dirs.push_back(config_cuda_data_dir); - } - potential_libdevice_dirs.push_back(tensorflow::LibdeviceRoot()); +// Returns a vector of potential locations of the CUDA root directory. +std::vector GetCudaRootCandidates( + const HloModuleConfig& hlo_module_config) { + std::vector potential_cuda_roots = tensorflow::CandidateCudaRoots(); - // Tries all potential libdevice directories in the order they are inserted. - // Returns the first directory that exists in the file system. - for (const string& potential_libdevice_dir : potential_libdevice_dirs) { - if (tensorflow::Env::Default()->IsDirectory(potential_libdevice_dir).ok()) { - VLOG(2) << "Found libdevice dir " << potential_libdevice_dir; - return potential_libdevice_dir; + // CUDA location explicitly specified by user via --xla_gpu_cuda_data_dir has + // highest priority. + string xla_gpu_cuda_data_dir = + hlo_module_config.debug_options().xla_gpu_cuda_data_dir(); + if (!xla_gpu_cuda_data_dir.empty()) { + potential_cuda_roots.insert(potential_cuda_roots.begin(), + xla_gpu_cuda_data_dir); + } + return potential_cuda_roots; +} + +// Returns the directory containing nvvm libdevice files. +string GetLibdeviceDir(const HloModuleConfig& hlo_module_config) { + for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) { + string libdevice_dir = + tensorflow::io::JoinPath(cuda_root, "nvvm", "libdevice"); + VLOG(2) << "Looking for libdevice at " << libdevice_dir; + if (tensorflow::Env::Default()->IsDirectory(libdevice_dir).ok()) { + VLOG(2) << "Found libdevice dir " << libdevice_dir; + return libdevice_dir; } - VLOG(2) << "Unable to find potential libdevice dir " - << potential_libdevice_dir; } - LOG(WARNING) << "Unable to find libdevice dir. Using '.'"; // Last resort: maybe in the current folder. return "."; @@ -478,14 +484,19 @@ void WarnIfBadDriverJITVersion() { // Compiles the given PTX string using ptxas and returns the resulting machine // code (i.e. a cubin) as a byte array. -StatusOr> CompilePtx(const string& ptx, int cc_major, - int cc_minor, - bool disable_ptx_optimizations) { +StatusOr> CompilePtx( + const string& ptx, int cc_major, int cc_minor, + const HloModuleConfig& hlo_module_config) { tracing::ScopedActivity activity("Compile PTX", /*is_expensive=*/true); - const string ptxas_path = - tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas"); - VLOG(2) << "Checking ptxas at " << ptxas_path; auto env = tensorflow::Env::Default(); + string ptxas_path; + for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) { + ptxas_path = tensorflow::io::JoinPath(cuda_root, "bin", "ptxas"); + VLOG(2) << "Looking for ptxas at " << ptxas_path; + if (env->FileExists(ptxas_path).ok()) { + break; + } + } TF_RETURN_IF_ERROR(env->FileExists(ptxas_path)); VLOG(2) << "Using ptxas at " << ptxas_path; @@ -520,7 +531,7 @@ StatusOr> CompilePtx(const string& ptx, int cc_major, if (VLOG_IS_ON(2)) { ptxas_args.push_back("-v"); } - if (disable_ptx_optimizations) { + if (hlo_module_config.debug_options().xla_gpu_disable_ptxas_optimizations()) { ptxas_args.push_back("-O0"); } ptxas_info_dumper.SetProgram(ptxas_path, ptxas_args); @@ -685,12 +696,8 @@ StatusOr> NVPTXCompiler::RunBackend( // Find the directory containing libdevice. To avoid searching for it every // time, we have a one-element cache, keyed on the module's config's // cuda_data_dir. - const auto& config_cuda_data_dir = - module->config().debug_options().xla_gpu_cuda_data_dir(); - if (cached_libdevice_dir_.empty() || - cached_cuda_data_dir_ != config_cuda_data_dir) { - cached_cuda_data_dir_ = config_cuda_data_dir; - cached_libdevice_dir_ = GetLibdeviceDir(config_cuda_data_dir); + if (cached_libdevice_dir_.empty()) { + cached_libdevice_dir_ = GetLibdeviceDir(module->config()); } libdevice_dir = cached_libdevice_dir_; } @@ -743,9 +750,8 @@ StatusOr> NVPTXCompiler::RunBackend( } } - const std::vector cubin = CompilePtxOrGetCachedResult( - ptx, cc_major, cc_minor, - module->config().debug_options().xla_gpu_disable_ptxas_optimizations()); + const std::vector cubin = + CompilePtxOrGetCachedResult(ptx, cc_major, cc_minor, module->config()); auto thunk_schedule = absl::make_unique( ir_emitter.ConsumeThunkSequence(), std::move(stream_assignment), @@ -779,7 +785,7 @@ StatusOr> NVPTXCompiler::RunBackend( std::vector NVPTXCompiler::CompilePtxOrGetCachedResult( const string& ptx, int cc_major, int cc_minor, - bool disable_ptx_optimizations) { + const HloModuleConfig& hlo_module_config) { XLA_SCOPED_LOGGING_TIMER("NVPTXCompiler::CompilePtxOrGetCachedResult"); tracing::ScopedActivity activity("PTX->CUBIN", /*is_expensive=*/true); bool inserted; @@ -807,8 +813,8 @@ std::vector NVPTXCompiler::CompilePtxOrGetCachedResult( if (inserted) { CHECK(!cache_value->compilation_done); if (!ptx.empty()) { - StatusOr> maybe_cubin = CompilePtx( - *cache_ptx, cc_major, cc_minor, disable_ptx_optimizations); + StatusOr> maybe_cubin = + CompilePtx(*cache_ptx, cc_major, cc_minor, hlo_module_config); if (maybe_cubin.ok()) { cache_value->cubin_data = std::move(maybe_cubin).ValueOrDie(); VLOG(2) << "Compiled PTX size:" << ptx.size() diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h index be5e31a5011..b2077f42fd0 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h @@ -99,7 +99,7 @@ class NVPTXCompiler : public LLVMCompiler { // compiled cubin. If compilation was unsuccessful, returns an empty vector. std::vector CompilePtxOrGetCachedResult( const string& ptx, int cc_major, int cc_minor, - bool disable_ptx_optimizations); + const HloModuleConfig& hlo_module_config); // The compilation_cache_ map is a cache from {ptx string, cc_major, cc_minor} // -> cubin so we don't recompile the same ptx twice. This is important for diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 5f5ca63540f..d92f0ba6552 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -4062,20 +4062,6 @@ tf_cuda_cc_test( ], ) -tf_cc_test_gpu( - name = "cuda_libdevice_path_test", - size = "small", - srcs = ["platform/cuda_libdevice_path_test.cc"], - linkstatic = tf_kernel_tests_linkstatic(), - tags = tf_cuda_tests_tags(), - deps = [ - ":cuda_libdevice_path", - ":lib", - ":test", - ":test_main", - ], -) - tf_cuda_only_cc_test( name = "util_cuda_kernel_helper_test", srcs = [ @@ -4931,7 +4917,7 @@ filegroup( cc_library( name = "cuda_libdevice_path", - srcs = ["platform/cuda_libdevice_path.cc"] + tf_additional_libdevice_srcs(), + srcs = tf_additional_libdevice_srcs(), hdrs = ["platform/cuda_libdevice_path.h"], copts = tf_copts(), data = tf_additional_libdevice_data(), diff --git a/tensorflow/core/platform/cuda_libdevice_path.cc b/tensorflow/core/platform/cuda_libdevice_path.cc deleted file mode 100644 index 4d6532b983d..00000000000 --- a/tensorflow/core/platform/cuda_libdevice_path.cc +++ /dev/null @@ -1,26 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/platform/cuda_libdevice_path.h" - -#include "tensorflow/core/lib/io/path.h" - -namespace tensorflow { - -string LibdeviceRoot() { - return tensorflow::io::JoinPath(tensorflow::CudaRoot(), "nvvm/libdevice"); -} - -} // namespace tensorflow diff --git a/tensorflow/core/platform/cuda_libdevice_path.h b/tensorflow/core/platform/cuda_libdevice_path.h index 6ef565ecd3c..f2dbff9043a 100644 --- a/tensorflow/core/platform/cuda_libdevice_path.h +++ b/tensorflow/core/platform/cuda_libdevice_path.h @@ -16,16 +16,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_ #define TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_ +#include #include "tensorflow/core/platform/types.h" namespace tensorflow { -// Returns the root directory of the CUDA SDK, which contains sub-folders such -// as bin, lib64, and nvvm. -string CudaRoot(); - -// Returns the directory that contains nvvm libdevice files in the CUDA SDK. -string LibdeviceRoot(); +// Returns, in order of preference, potential locations of the root directory of +// the CUDA SDK, which contains sub-folders such as bin, lib64, and nvvm. +std::vector CandidateCudaRoots(); } // namespace tensorflow diff --git a/tensorflow/core/platform/cuda_libdevice_path_test.cc b/tensorflow/core/platform/cuda_libdevice_path_test.cc deleted file mode 100644 index 2d34239a995..00000000000 --- a/tensorflow/core/platform/cuda_libdevice_path_test.cc +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/platform/cuda_libdevice_path.h" - -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/lib/io/path.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { - -#if GOOGLE_CUDA -TEST(CudaLibdevicePathTest, LibdevicePath) { - VLOG(2) << "Libdevice root = " << LibdeviceRoot(); - std::vector libdevice_files; - TF_EXPECT_OK(Env::Default()->GetMatchingPaths( - io::JoinPath(LibdeviceRoot(), "libdevice.*.bc"), &libdevice_files)); - EXPECT_LT(0, libdevice_files.size()); -} -#endif - -} // namespace tensorflow diff --git a/tensorflow/core/platform/default/cuda_libdevice_path.cc b/tensorflow/core/platform/default/cuda_libdevice_path.cc index 20ee3ad621a..a8b2e7202ac 100644 --- a/tensorflow/core/platform/default/cuda_libdevice_path.cc +++ b/tensorflow/core/platform/default/cuda_libdevice_path.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/platform/cuda_libdevice_path.h" #include +#include #if !defined(PLATFORM_GOOGLE) #include "cuda/cuda_config.h" @@ -24,9 +25,9 @@ limitations under the License. namespace tensorflow { -string CudaRoot() { +std::vector CandidateCudaRoots() { VLOG(3) << "CUDA root = " << TF_CUDA_TOOLKIT_PATH; - return TF_CUDA_TOOLKIT_PATH; + return {TF_CUDA_TOOLKIT_PATH}; } } // namespace tensorflow