Improve CUDA runtime dependencies search.

tensorflow::CudaRoot() now may return multiple possible locations of the CUDA root.
PiperOrigin-RevId: 225091635
This commit is contained in:
Artem Belevich 2018-12-11 16:24:24 -08:00 committed by TensorFlower Gardener
parent 33bc0b9788
commit bafb874798
7 changed files with 52 additions and 122 deletions

View File

@ -108,27 +108,33 @@ namespace {
namespace tracing = tensorflow::tracing; namespace tracing = tensorflow::tracing;
// Returns the directory containing nvvm libdevice files. config_cuda_data_dir // Returns a vector of potential locations of the CUDA root directory.
// should be equal to config().debug_options().xla_gpu_cuda_data_dir() of the std::vector<string> GetCudaRootCandidates(
// HloModule being compiled. const HloModuleConfig& hlo_module_config) {
string GetLibdeviceDir(const string& config_cuda_data_dir) { std::vector<string> potential_cuda_roots = tensorflow::CandidateCudaRoots();
std::vector<string> potential_libdevice_dirs;
if (!config_cuda_data_dir.empty()) {
potential_libdevice_dirs.push_back(config_cuda_data_dir);
}
potential_libdevice_dirs.push_back(tensorflow::LibdeviceRoot());
// Tries all potential libdevice directories in the order they are inserted. // CUDA location explicitly specified by user via --xla_gpu_cuda_data_dir has
// Returns the first directory that exists in the file system. // highest priority.
for (const string& potential_libdevice_dir : potential_libdevice_dirs) { string xla_gpu_cuda_data_dir =
if (tensorflow::Env::Default()->IsDirectory(potential_libdevice_dir).ok()) { hlo_module_config.debug_options().xla_gpu_cuda_data_dir();
VLOG(2) << "Found libdevice dir " << potential_libdevice_dir; if (!xla_gpu_cuda_data_dir.empty()) {
return potential_libdevice_dir; potential_cuda_roots.insert(potential_cuda_roots.begin(),
xla_gpu_cuda_data_dir);
}
return potential_cuda_roots;
}
// Returns the directory containing nvvm libdevice files.
string GetLibdeviceDir(const HloModuleConfig& hlo_module_config) {
for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
string libdevice_dir =
tensorflow::io::JoinPath(cuda_root, "nvvm", "libdevice");
VLOG(2) << "Looking for libdevice at " << libdevice_dir;
if (tensorflow::Env::Default()->IsDirectory(libdevice_dir).ok()) {
VLOG(2) << "Found libdevice dir " << libdevice_dir;
return libdevice_dir;
} }
VLOG(2) << "Unable to find potential libdevice dir "
<< potential_libdevice_dir;
} }
LOG(WARNING) << "Unable to find libdevice dir. Using '.'"; LOG(WARNING) << "Unable to find libdevice dir. Using '.'";
// Last resort: maybe in the current folder. // Last resort: maybe in the current folder.
return "."; return ".";
@ -478,14 +484,19 @@ void WarnIfBadDriverJITVersion() {
// Compiles the given PTX string using ptxas and returns the resulting machine // Compiles the given PTX string using ptxas and returns the resulting machine
// code (i.e. a cubin) as a byte array. // code (i.e. a cubin) as a byte array.
StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major, StatusOr<std::vector<uint8>> CompilePtx(
int cc_minor, const string& ptx, int cc_major, int cc_minor,
bool disable_ptx_optimizations) { const HloModuleConfig& hlo_module_config) {
tracing::ScopedActivity activity("Compile PTX", /*is_expensive=*/true); tracing::ScopedActivity activity("Compile PTX", /*is_expensive=*/true);
const string ptxas_path =
tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas");
VLOG(2) << "Checking ptxas at " << ptxas_path;
auto env = tensorflow::Env::Default(); auto env = tensorflow::Env::Default();
string ptxas_path;
for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
ptxas_path = tensorflow::io::JoinPath(cuda_root, "bin", "ptxas");
VLOG(2) << "Looking for ptxas at " << ptxas_path;
if (env->FileExists(ptxas_path).ok()) {
break;
}
}
TF_RETURN_IF_ERROR(env->FileExists(ptxas_path)); TF_RETURN_IF_ERROR(env->FileExists(ptxas_path));
VLOG(2) << "Using ptxas at " << ptxas_path; VLOG(2) << "Using ptxas at " << ptxas_path;
@ -520,7 +531,7 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
if (VLOG_IS_ON(2)) { if (VLOG_IS_ON(2)) {
ptxas_args.push_back("-v"); ptxas_args.push_back("-v");
} }
if (disable_ptx_optimizations) { if (hlo_module_config.debug_options().xla_gpu_disable_ptxas_optimizations()) {
ptxas_args.push_back("-O0"); ptxas_args.push_back("-O0");
} }
ptxas_info_dumper.SetProgram(ptxas_path, ptxas_args); ptxas_info_dumper.SetProgram(ptxas_path, ptxas_args);
@ -685,12 +696,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
// Find the directory containing libdevice. To avoid searching for it every // Find the directory containing libdevice. To avoid searching for it every
// time, we have a one-element cache, keyed on the module's config's // time, we have a one-element cache, keyed on the module's config's
// cuda_data_dir. // cuda_data_dir.
const auto& config_cuda_data_dir = if (cached_libdevice_dir_.empty()) {
module->config().debug_options().xla_gpu_cuda_data_dir(); cached_libdevice_dir_ = GetLibdeviceDir(module->config());
if (cached_libdevice_dir_.empty() ||
cached_cuda_data_dir_ != config_cuda_data_dir) {
cached_cuda_data_dir_ = config_cuda_data_dir;
cached_libdevice_dir_ = GetLibdeviceDir(config_cuda_data_dir);
} }
libdevice_dir = cached_libdevice_dir_; libdevice_dir = cached_libdevice_dir_;
} }
@ -743,9 +750,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
} }
} }
const std::vector<uint8> cubin = CompilePtxOrGetCachedResult( const std::vector<uint8> cubin =
ptx, cc_major, cc_minor, CompilePtxOrGetCachedResult(ptx, cc_major, cc_minor, module->config());
module->config().debug_options().xla_gpu_disable_ptxas_optimizations());
auto thunk_schedule = absl::make_unique<ThunkSchedule>( auto thunk_schedule = absl::make_unique<ThunkSchedule>(
ir_emitter.ConsumeThunkSequence(), std::move(stream_assignment), ir_emitter.ConsumeThunkSequence(), std::move(stream_assignment),
@ -779,7 +785,7 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult( std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
const string& ptx, int cc_major, int cc_minor, const string& ptx, int cc_major, int cc_minor,
bool disable_ptx_optimizations) { const HloModuleConfig& hlo_module_config) {
XLA_SCOPED_LOGGING_TIMER("NVPTXCompiler::CompilePtxOrGetCachedResult"); XLA_SCOPED_LOGGING_TIMER("NVPTXCompiler::CompilePtxOrGetCachedResult");
tracing::ScopedActivity activity("PTX->CUBIN", /*is_expensive=*/true); tracing::ScopedActivity activity("PTX->CUBIN", /*is_expensive=*/true);
bool inserted; bool inserted;
@ -807,8 +813,8 @@ std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
if (inserted) { if (inserted) {
CHECK(!cache_value->compilation_done); CHECK(!cache_value->compilation_done);
if (!ptx.empty()) { if (!ptx.empty()) {
StatusOr<std::vector<uint8>> maybe_cubin = CompilePtx( StatusOr<std::vector<uint8>> maybe_cubin =
*cache_ptx, cc_major, cc_minor, disable_ptx_optimizations); CompilePtx(*cache_ptx, cc_major, cc_minor, hlo_module_config);
if (maybe_cubin.ok()) { if (maybe_cubin.ok()) {
cache_value->cubin_data = std::move(maybe_cubin).ValueOrDie(); cache_value->cubin_data = std::move(maybe_cubin).ValueOrDie();
VLOG(2) << "Compiled PTX size:" << ptx.size() VLOG(2) << "Compiled PTX size:" << ptx.size()

View File

@ -99,7 +99,7 @@ class NVPTXCompiler : public LLVMCompiler {
// compiled cubin. If compilation was unsuccessful, returns an empty vector. // compiled cubin. If compilation was unsuccessful, returns an empty vector.
std::vector<uint8> CompilePtxOrGetCachedResult( std::vector<uint8> CompilePtxOrGetCachedResult(
const string& ptx, int cc_major, int cc_minor, const string& ptx, int cc_major, int cc_minor,
bool disable_ptx_optimizations); const HloModuleConfig& hlo_module_config);
// The compilation_cache_ map is a cache from {ptx string, cc_major, cc_minor} // The compilation_cache_ map is a cache from {ptx string, cc_major, cc_minor}
// -> cubin so we don't recompile the same ptx twice. This is important for // -> cubin so we don't recompile the same ptx twice. This is important for

View File

@ -4062,20 +4062,6 @@ tf_cuda_cc_test(
], ],
) )
tf_cc_test_gpu(
name = "cuda_libdevice_path_test",
size = "small",
srcs = ["platform/cuda_libdevice_path_test.cc"],
linkstatic = tf_kernel_tests_linkstatic(),
tags = tf_cuda_tests_tags(),
deps = [
":cuda_libdevice_path",
":lib",
":test",
":test_main",
],
)
tf_cuda_only_cc_test( tf_cuda_only_cc_test(
name = "util_cuda_kernel_helper_test", name = "util_cuda_kernel_helper_test",
srcs = [ srcs = [
@ -4931,7 +4917,7 @@ filegroup(
cc_library( cc_library(
name = "cuda_libdevice_path", name = "cuda_libdevice_path",
srcs = ["platform/cuda_libdevice_path.cc"] + tf_additional_libdevice_srcs(), srcs = tf_additional_libdevice_srcs(),
hdrs = ["platform/cuda_libdevice_path.h"], hdrs = ["platform/cuda_libdevice_path.h"],
copts = tf_copts(), copts = tf_copts(),
data = tf_additional_libdevice_data(), data = tf_additional_libdevice_data(),

View File

@ -1,26 +0,0 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/platform/cuda_libdevice_path.h"
#include "tensorflow/core/lib/io/path.h"
namespace tensorflow {
string LibdeviceRoot() {
return tensorflow::io::JoinPath(tensorflow::CudaRoot(), "nvvm/libdevice");
}
} // namespace tensorflow

View File

@ -16,16 +16,14 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_ #ifndef TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
#define TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_ #define TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
#include <vector>
#include "tensorflow/core/platform/types.h" #include "tensorflow/core/platform/types.h"
namespace tensorflow { namespace tensorflow {
// Returns the root directory of the CUDA SDK, which contains sub-folders such // Returns, in order of preference, potential locations of the root directory of
// as bin, lib64, and nvvm. // the CUDA SDK, which contains sub-folders such as bin, lib64, and nvvm.
string CudaRoot(); std::vector<string> CandidateCudaRoots();
// Returns the directory that contains nvvm libdevice files in the CUDA SDK.
string LibdeviceRoot();
} // namespace tensorflow } // namespace tensorflow

View File

@ -1,35 +0,0 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/platform/cuda_libdevice_path.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
#if GOOGLE_CUDA
TEST(CudaLibdevicePathTest, LibdevicePath) {
VLOG(2) << "Libdevice root = " << LibdeviceRoot();
std::vector<string> libdevice_files;
TF_EXPECT_OK(Env::Default()->GetMatchingPaths(
io::JoinPath(LibdeviceRoot(), "libdevice.*.bc"), &libdevice_files));
EXPECT_LT(0, libdevice_files.size());
}
#endif
} // namespace tensorflow

View File

@ -16,6 +16,7 @@ limitations under the License.
#include "tensorflow/core/platform/cuda_libdevice_path.h" #include "tensorflow/core/platform/cuda_libdevice_path.h"
#include <stdlib.h> #include <stdlib.h>
#include <vector>
#if !defined(PLATFORM_GOOGLE) #if !defined(PLATFORM_GOOGLE)
#include "cuda/cuda_config.h" #include "cuda/cuda_config.h"
@ -24,9 +25,9 @@ limitations under the License.
namespace tensorflow { namespace tensorflow {
string CudaRoot() { std::vector<string> CandidateCudaRoots() {
VLOG(3) << "CUDA root = " << TF_CUDA_TOOLKIT_PATH; VLOG(3) << "CUDA root = " << TF_CUDA_TOOLKIT_PATH;
return TF_CUDA_TOOLKIT_PATH; return {TF_CUDA_TOOLKIT_PATH};
} }
} // namespace tensorflow } // namespace tensorflow