Improve CUDA runtime dependencies search.

tensorflow::CudaRoot() now may return multiple possible locations of the CUDA root.
PiperOrigin-RevId: 225091635
This commit is contained in:
Artem Belevich 2018-12-11 16:24:24 -08:00 committed by TensorFlower Gardener
parent 33bc0b9788
commit bafb874798
7 changed files with 52 additions and 122 deletions

View File

@ -108,27 +108,33 @@ namespace {
namespace tracing = tensorflow::tracing;
// Returns the directory containing nvvm libdevice files. config_cuda_data_dir
// should be equal to config().debug_options().xla_gpu_cuda_data_dir() of the
// HloModule being compiled.
string GetLibdeviceDir(const string& config_cuda_data_dir) {
std::vector<string> potential_libdevice_dirs;
if (!config_cuda_data_dir.empty()) {
potential_libdevice_dirs.push_back(config_cuda_data_dir);
}
potential_libdevice_dirs.push_back(tensorflow::LibdeviceRoot());
// Returns a vector of potential locations of the CUDA root directory.
std::vector<string> GetCudaRootCandidates(
const HloModuleConfig& hlo_module_config) {
std::vector<string> potential_cuda_roots = tensorflow::CandidateCudaRoots();
// Tries all potential libdevice directories in the order they are inserted.
// Returns the first directory that exists in the file system.
for (const string& potential_libdevice_dir : potential_libdevice_dirs) {
if (tensorflow::Env::Default()->IsDirectory(potential_libdevice_dir).ok()) {
VLOG(2) << "Found libdevice dir " << potential_libdevice_dir;
return potential_libdevice_dir;
}
VLOG(2) << "Unable to find potential libdevice dir "
<< potential_libdevice_dir;
// CUDA location explicitly specified by user via --xla_gpu_cuda_data_dir has
// highest priority.
string xla_gpu_cuda_data_dir =
hlo_module_config.debug_options().xla_gpu_cuda_data_dir();
if (!xla_gpu_cuda_data_dir.empty()) {
potential_cuda_roots.insert(potential_cuda_roots.begin(),
xla_gpu_cuda_data_dir);
}
return potential_cuda_roots;
}
// Returns the directory containing nvvm libdevice files.
string GetLibdeviceDir(const HloModuleConfig& hlo_module_config) {
for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
string libdevice_dir =
tensorflow::io::JoinPath(cuda_root, "nvvm", "libdevice");
VLOG(2) << "Looking for libdevice at " << libdevice_dir;
if (tensorflow::Env::Default()->IsDirectory(libdevice_dir).ok()) {
VLOG(2) << "Found libdevice dir " << libdevice_dir;
return libdevice_dir;
}
}
LOG(WARNING) << "Unable to find libdevice dir. Using '.'";
// Last resort: maybe in the current folder.
return ".";
@ -478,14 +484,19 @@ void WarnIfBadDriverJITVersion() {
// Compiles the given PTX string using ptxas and returns the resulting machine
// code (i.e. a cubin) as a byte array.
StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
int cc_minor,
bool disable_ptx_optimizations) {
StatusOr<std::vector<uint8>> CompilePtx(
const string& ptx, int cc_major, int cc_minor,
const HloModuleConfig& hlo_module_config) {
tracing::ScopedActivity activity("Compile PTX", /*is_expensive=*/true);
const string ptxas_path =
tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas");
VLOG(2) << "Checking ptxas at " << ptxas_path;
auto env = tensorflow::Env::Default();
string ptxas_path;
for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
ptxas_path = tensorflow::io::JoinPath(cuda_root, "bin", "ptxas");
VLOG(2) << "Looking for ptxas at " << ptxas_path;
if (env->FileExists(ptxas_path).ok()) {
break;
}
}
TF_RETURN_IF_ERROR(env->FileExists(ptxas_path));
VLOG(2) << "Using ptxas at " << ptxas_path;
@ -520,7 +531,7 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
if (VLOG_IS_ON(2)) {
ptxas_args.push_back("-v");
}
if (disable_ptx_optimizations) {
if (hlo_module_config.debug_options().xla_gpu_disable_ptxas_optimizations()) {
ptxas_args.push_back("-O0");
}
ptxas_info_dumper.SetProgram(ptxas_path, ptxas_args);
@ -685,12 +696,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
// Find the directory containing libdevice. To avoid searching for it every
// time, we have a one-element cache, keyed on the module's config's
// cuda_data_dir.
const auto& config_cuda_data_dir =
module->config().debug_options().xla_gpu_cuda_data_dir();
if (cached_libdevice_dir_.empty() ||
cached_cuda_data_dir_ != config_cuda_data_dir) {
cached_cuda_data_dir_ = config_cuda_data_dir;
cached_libdevice_dir_ = GetLibdeviceDir(config_cuda_data_dir);
if (cached_libdevice_dir_.empty()) {
cached_libdevice_dir_ = GetLibdeviceDir(module->config());
}
libdevice_dir = cached_libdevice_dir_;
}
@ -743,9 +750,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
}
}
const std::vector<uint8> cubin = CompilePtxOrGetCachedResult(
ptx, cc_major, cc_minor,
module->config().debug_options().xla_gpu_disable_ptxas_optimizations());
const std::vector<uint8> cubin =
CompilePtxOrGetCachedResult(ptx, cc_major, cc_minor, module->config());
auto thunk_schedule = absl::make_unique<ThunkSchedule>(
ir_emitter.ConsumeThunkSequence(), std::move(stream_assignment),
@ -779,7 +785,7 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
const string& ptx, int cc_major, int cc_minor,
bool disable_ptx_optimizations) {
const HloModuleConfig& hlo_module_config) {
XLA_SCOPED_LOGGING_TIMER("NVPTXCompiler::CompilePtxOrGetCachedResult");
tracing::ScopedActivity activity("PTX->CUBIN", /*is_expensive=*/true);
bool inserted;
@ -807,8 +813,8 @@ std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
if (inserted) {
CHECK(!cache_value->compilation_done);
if (!ptx.empty()) {
StatusOr<std::vector<uint8>> maybe_cubin = CompilePtx(
*cache_ptx, cc_major, cc_minor, disable_ptx_optimizations);
StatusOr<std::vector<uint8>> maybe_cubin =
CompilePtx(*cache_ptx, cc_major, cc_minor, hlo_module_config);
if (maybe_cubin.ok()) {
cache_value->cubin_data = std::move(maybe_cubin).ValueOrDie();
VLOG(2) << "Compiled PTX size:" << ptx.size()

View File

@ -99,7 +99,7 @@ class NVPTXCompiler : public LLVMCompiler {
// compiled cubin. If compilation was unsuccessful, returns an empty vector.
std::vector<uint8> CompilePtxOrGetCachedResult(
const string& ptx, int cc_major, int cc_minor,
bool disable_ptx_optimizations);
const HloModuleConfig& hlo_module_config);
// The compilation_cache_ map is a cache from {ptx string, cc_major, cc_minor}
// -> cubin so we don't recompile the same ptx twice. This is important for

View File

@ -4062,20 +4062,6 @@ tf_cuda_cc_test(
],
)
tf_cc_test_gpu(
name = "cuda_libdevice_path_test",
size = "small",
srcs = ["platform/cuda_libdevice_path_test.cc"],
linkstatic = tf_kernel_tests_linkstatic(),
tags = tf_cuda_tests_tags(),
deps = [
":cuda_libdevice_path",
":lib",
":test",
":test_main",
],
)
tf_cuda_only_cc_test(
name = "util_cuda_kernel_helper_test",
srcs = [
@ -4931,7 +4917,7 @@ filegroup(
cc_library(
name = "cuda_libdevice_path",
srcs = ["platform/cuda_libdevice_path.cc"] + tf_additional_libdevice_srcs(),
srcs = tf_additional_libdevice_srcs(),
hdrs = ["platform/cuda_libdevice_path.h"],
copts = tf_copts(),
data = tf_additional_libdevice_data(),

View File

@ -1,26 +0,0 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/platform/cuda_libdevice_path.h"
#include "tensorflow/core/lib/io/path.h"
namespace tensorflow {
string LibdeviceRoot() {
return tensorflow::io::JoinPath(tensorflow::CudaRoot(), "nvvm/libdevice");
}
} // namespace tensorflow

View File

@ -16,16 +16,14 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
#define TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
#include <vector>
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
// Returns the root directory of the CUDA SDK, which contains sub-folders such
// as bin, lib64, and nvvm.
string CudaRoot();
// Returns the directory that contains nvvm libdevice files in the CUDA SDK.
string LibdeviceRoot();
// Returns, in order of preference, potential locations of the root directory of
// the CUDA SDK, which contains sub-folders such as bin, lib64, and nvvm.
std::vector<string> CandidateCudaRoots();
} // namespace tensorflow

View File

@ -1,35 +0,0 @@
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/platform/cuda_libdevice_path.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
#if GOOGLE_CUDA
TEST(CudaLibdevicePathTest, LibdevicePath) {
VLOG(2) << "Libdevice root = " << LibdeviceRoot();
std::vector<string> libdevice_files;
TF_EXPECT_OK(Env::Default()->GetMatchingPaths(
io::JoinPath(LibdeviceRoot(), "libdevice.*.bc"), &libdevice_files));
EXPECT_LT(0, libdevice_files.size());
}
#endif
} // namespace tensorflow

View File

@ -16,6 +16,7 @@ limitations under the License.
#include "tensorflow/core/platform/cuda_libdevice_path.h"
#include <stdlib.h>
#include <vector>
#if !defined(PLATFORM_GOOGLE)
#include "cuda/cuda_config.h"
@ -24,9 +25,9 @@ limitations under the License.
namespace tensorflow {
string CudaRoot() {
std::vector<string> CandidateCudaRoots() {
VLOG(3) << "CUDA root = " << TF_CUDA_TOOLKIT_PATH;
return TF_CUDA_TOOLKIT_PATH;
return {TF_CUDA_TOOLKIT_PATH};
}
} // namespace tensorflow