Improve CUDA runtime dependencies search.
tensorflow::CudaRoot() now may return multiple possible locations of the CUDA root. PiperOrigin-RevId: 225091635
This commit is contained in:
parent
33bc0b9788
commit
bafb874798
@ -108,27 +108,33 @@ namespace {
|
|||||||
|
|
||||||
namespace tracing = tensorflow::tracing;
|
namespace tracing = tensorflow::tracing;
|
||||||
|
|
||||||
// Returns the directory containing nvvm libdevice files. config_cuda_data_dir
|
// Returns a vector of potential locations of the CUDA root directory.
|
||||||
// should be equal to config().debug_options().xla_gpu_cuda_data_dir() of the
|
std::vector<string> GetCudaRootCandidates(
|
||||||
// HloModule being compiled.
|
const HloModuleConfig& hlo_module_config) {
|
||||||
string GetLibdeviceDir(const string& config_cuda_data_dir) {
|
std::vector<string> potential_cuda_roots = tensorflow::CandidateCudaRoots();
|
||||||
std::vector<string> potential_libdevice_dirs;
|
|
||||||
if (!config_cuda_data_dir.empty()) {
|
|
||||||
potential_libdevice_dirs.push_back(config_cuda_data_dir);
|
|
||||||
}
|
|
||||||
potential_libdevice_dirs.push_back(tensorflow::LibdeviceRoot());
|
|
||||||
|
|
||||||
// Tries all potential libdevice directories in the order they are inserted.
|
// CUDA location explicitly specified by user via --xla_gpu_cuda_data_dir has
|
||||||
// Returns the first directory that exists in the file system.
|
// highest priority.
|
||||||
for (const string& potential_libdevice_dir : potential_libdevice_dirs) {
|
string xla_gpu_cuda_data_dir =
|
||||||
if (tensorflow::Env::Default()->IsDirectory(potential_libdevice_dir).ok()) {
|
hlo_module_config.debug_options().xla_gpu_cuda_data_dir();
|
||||||
VLOG(2) << "Found libdevice dir " << potential_libdevice_dir;
|
if (!xla_gpu_cuda_data_dir.empty()) {
|
||||||
return potential_libdevice_dir;
|
potential_cuda_roots.insert(potential_cuda_roots.begin(),
|
||||||
|
xla_gpu_cuda_data_dir);
|
||||||
|
}
|
||||||
|
return potential_cuda_roots;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the directory containing nvvm libdevice files.
|
||||||
|
string GetLibdeviceDir(const HloModuleConfig& hlo_module_config) {
|
||||||
|
for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
|
||||||
|
string libdevice_dir =
|
||||||
|
tensorflow::io::JoinPath(cuda_root, "nvvm", "libdevice");
|
||||||
|
VLOG(2) << "Looking for libdevice at " << libdevice_dir;
|
||||||
|
if (tensorflow::Env::Default()->IsDirectory(libdevice_dir).ok()) {
|
||||||
|
VLOG(2) << "Found libdevice dir " << libdevice_dir;
|
||||||
|
return libdevice_dir;
|
||||||
}
|
}
|
||||||
VLOG(2) << "Unable to find potential libdevice dir "
|
|
||||||
<< potential_libdevice_dir;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG(WARNING) << "Unable to find libdevice dir. Using '.'";
|
LOG(WARNING) << "Unable to find libdevice dir. Using '.'";
|
||||||
// Last resort: maybe in the current folder.
|
// Last resort: maybe in the current folder.
|
||||||
return ".";
|
return ".";
|
||||||
@ -478,14 +484,19 @@ void WarnIfBadDriverJITVersion() {
|
|||||||
|
|
||||||
// Compiles the given PTX string using ptxas and returns the resulting machine
|
// Compiles the given PTX string using ptxas and returns the resulting machine
|
||||||
// code (i.e. a cubin) as a byte array.
|
// code (i.e. a cubin) as a byte array.
|
||||||
StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
|
StatusOr<std::vector<uint8>> CompilePtx(
|
||||||
int cc_minor,
|
const string& ptx, int cc_major, int cc_minor,
|
||||||
bool disable_ptx_optimizations) {
|
const HloModuleConfig& hlo_module_config) {
|
||||||
tracing::ScopedActivity activity("Compile PTX", /*is_expensive=*/true);
|
tracing::ScopedActivity activity("Compile PTX", /*is_expensive=*/true);
|
||||||
const string ptxas_path =
|
|
||||||
tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas");
|
|
||||||
VLOG(2) << "Checking ptxas at " << ptxas_path;
|
|
||||||
auto env = tensorflow::Env::Default();
|
auto env = tensorflow::Env::Default();
|
||||||
|
string ptxas_path;
|
||||||
|
for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
|
||||||
|
ptxas_path = tensorflow::io::JoinPath(cuda_root, "bin", "ptxas");
|
||||||
|
VLOG(2) << "Looking for ptxas at " << ptxas_path;
|
||||||
|
if (env->FileExists(ptxas_path).ok()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
TF_RETURN_IF_ERROR(env->FileExists(ptxas_path));
|
TF_RETURN_IF_ERROR(env->FileExists(ptxas_path));
|
||||||
VLOG(2) << "Using ptxas at " << ptxas_path;
|
VLOG(2) << "Using ptxas at " << ptxas_path;
|
||||||
|
|
||||||
@ -520,7 +531,7 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
|
|||||||
if (VLOG_IS_ON(2)) {
|
if (VLOG_IS_ON(2)) {
|
||||||
ptxas_args.push_back("-v");
|
ptxas_args.push_back("-v");
|
||||||
}
|
}
|
||||||
if (disable_ptx_optimizations) {
|
if (hlo_module_config.debug_options().xla_gpu_disable_ptxas_optimizations()) {
|
||||||
ptxas_args.push_back("-O0");
|
ptxas_args.push_back("-O0");
|
||||||
}
|
}
|
||||||
ptxas_info_dumper.SetProgram(ptxas_path, ptxas_args);
|
ptxas_info_dumper.SetProgram(ptxas_path, ptxas_args);
|
||||||
@ -685,12 +696,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
|
|||||||
// Find the directory containing libdevice. To avoid searching for it every
|
// Find the directory containing libdevice. To avoid searching for it every
|
||||||
// time, we have a one-element cache, keyed on the module's config's
|
// time, we have a one-element cache, keyed on the module's config's
|
||||||
// cuda_data_dir.
|
// cuda_data_dir.
|
||||||
const auto& config_cuda_data_dir =
|
if (cached_libdevice_dir_.empty()) {
|
||||||
module->config().debug_options().xla_gpu_cuda_data_dir();
|
cached_libdevice_dir_ = GetLibdeviceDir(module->config());
|
||||||
if (cached_libdevice_dir_.empty() ||
|
|
||||||
cached_cuda_data_dir_ != config_cuda_data_dir) {
|
|
||||||
cached_cuda_data_dir_ = config_cuda_data_dir;
|
|
||||||
cached_libdevice_dir_ = GetLibdeviceDir(config_cuda_data_dir);
|
|
||||||
}
|
}
|
||||||
libdevice_dir = cached_libdevice_dir_;
|
libdevice_dir = cached_libdevice_dir_;
|
||||||
}
|
}
|
||||||
@ -743,9 +750,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<uint8> cubin = CompilePtxOrGetCachedResult(
|
const std::vector<uint8> cubin =
|
||||||
ptx, cc_major, cc_minor,
|
CompilePtxOrGetCachedResult(ptx, cc_major, cc_minor, module->config());
|
||||||
module->config().debug_options().xla_gpu_disable_ptxas_optimizations());
|
|
||||||
|
|
||||||
auto thunk_schedule = absl::make_unique<ThunkSchedule>(
|
auto thunk_schedule = absl::make_unique<ThunkSchedule>(
|
||||||
ir_emitter.ConsumeThunkSequence(), std::move(stream_assignment),
|
ir_emitter.ConsumeThunkSequence(), std::move(stream_assignment),
|
||||||
@ -779,7 +785,7 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
|
|||||||
|
|
||||||
std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
|
std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
|
||||||
const string& ptx, int cc_major, int cc_minor,
|
const string& ptx, int cc_major, int cc_minor,
|
||||||
bool disable_ptx_optimizations) {
|
const HloModuleConfig& hlo_module_config) {
|
||||||
XLA_SCOPED_LOGGING_TIMER("NVPTXCompiler::CompilePtxOrGetCachedResult");
|
XLA_SCOPED_LOGGING_TIMER("NVPTXCompiler::CompilePtxOrGetCachedResult");
|
||||||
tracing::ScopedActivity activity("PTX->CUBIN", /*is_expensive=*/true);
|
tracing::ScopedActivity activity("PTX->CUBIN", /*is_expensive=*/true);
|
||||||
bool inserted;
|
bool inserted;
|
||||||
@ -807,8 +813,8 @@ std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
|
|||||||
if (inserted) {
|
if (inserted) {
|
||||||
CHECK(!cache_value->compilation_done);
|
CHECK(!cache_value->compilation_done);
|
||||||
if (!ptx.empty()) {
|
if (!ptx.empty()) {
|
||||||
StatusOr<std::vector<uint8>> maybe_cubin = CompilePtx(
|
StatusOr<std::vector<uint8>> maybe_cubin =
|
||||||
*cache_ptx, cc_major, cc_minor, disable_ptx_optimizations);
|
CompilePtx(*cache_ptx, cc_major, cc_minor, hlo_module_config);
|
||||||
if (maybe_cubin.ok()) {
|
if (maybe_cubin.ok()) {
|
||||||
cache_value->cubin_data = std::move(maybe_cubin).ValueOrDie();
|
cache_value->cubin_data = std::move(maybe_cubin).ValueOrDie();
|
||||||
VLOG(2) << "Compiled PTX size:" << ptx.size()
|
VLOG(2) << "Compiled PTX size:" << ptx.size()
|
||||||
|
@ -99,7 +99,7 @@ class NVPTXCompiler : public LLVMCompiler {
|
|||||||
// compiled cubin. If compilation was unsuccessful, returns an empty vector.
|
// compiled cubin. If compilation was unsuccessful, returns an empty vector.
|
||||||
std::vector<uint8> CompilePtxOrGetCachedResult(
|
std::vector<uint8> CompilePtxOrGetCachedResult(
|
||||||
const string& ptx, int cc_major, int cc_minor,
|
const string& ptx, int cc_major, int cc_minor,
|
||||||
bool disable_ptx_optimizations);
|
const HloModuleConfig& hlo_module_config);
|
||||||
|
|
||||||
// The compilation_cache_ map is a cache from {ptx string, cc_major, cc_minor}
|
// The compilation_cache_ map is a cache from {ptx string, cc_major, cc_minor}
|
||||||
// -> cubin so we don't recompile the same ptx twice. This is important for
|
// -> cubin so we don't recompile the same ptx twice. This is important for
|
||||||
|
@ -4062,20 +4062,6 @@ tf_cuda_cc_test(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
tf_cc_test_gpu(
|
|
||||||
name = "cuda_libdevice_path_test",
|
|
||||||
size = "small",
|
|
||||||
srcs = ["platform/cuda_libdevice_path_test.cc"],
|
|
||||||
linkstatic = tf_kernel_tests_linkstatic(),
|
|
||||||
tags = tf_cuda_tests_tags(),
|
|
||||||
deps = [
|
|
||||||
":cuda_libdevice_path",
|
|
||||||
":lib",
|
|
||||||
":test",
|
|
||||||
":test_main",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
tf_cuda_only_cc_test(
|
tf_cuda_only_cc_test(
|
||||||
name = "util_cuda_kernel_helper_test",
|
name = "util_cuda_kernel_helper_test",
|
||||||
srcs = [
|
srcs = [
|
||||||
@ -4931,7 +4917,7 @@ filegroup(
|
|||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "cuda_libdevice_path",
|
name = "cuda_libdevice_path",
|
||||||
srcs = ["platform/cuda_libdevice_path.cc"] + tf_additional_libdevice_srcs(),
|
srcs = tf_additional_libdevice_srcs(),
|
||||||
hdrs = ["platform/cuda_libdevice_path.h"],
|
hdrs = ["platform/cuda_libdevice_path.h"],
|
||||||
copts = tf_copts(),
|
copts = tf_copts(),
|
||||||
data = tf_additional_libdevice_data(),
|
data = tf_additional_libdevice_data(),
|
||||||
|
@ -1,26 +0,0 @@
|
|||||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/core/platform/cuda_libdevice_path.h"
|
|
||||||
|
|
||||||
#include "tensorflow/core/lib/io/path.h"
|
|
||||||
|
|
||||||
namespace tensorflow {
|
|
||||||
|
|
||||||
string LibdeviceRoot() {
|
|
||||||
return tensorflow::io::JoinPath(tensorflow::CudaRoot(), "nvvm/libdevice");
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace tensorflow
|
|
@ -16,16 +16,14 @@ limitations under the License.
|
|||||||
#ifndef TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
|
#ifndef TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
|
||||||
#define TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
|
#define TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
#include "tensorflow/core/platform/types.h"
|
#include "tensorflow/core/platform/types.h"
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
// Returns the root directory of the CUDA SDK, which contains sub-folders such
|
// Returns, in order of preference, potential locations of the root directory of
|
||||||
// as bin, lib64, and nvvm.
|
// the CUDA SDK, which contains sub-folders such as bin, lib64, and nvvm.
|
||||||
string CudaRoot();
|
std::vector<string> CandidateCudaRoots();
|
||||||
|
|
||||||
// Returns the directory that contains nvvm libdevice files in the CUDA SDK.
|
|
||||||
string LibdeviceRoot();
|
|
||||||
|
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
@ -1,35 +0,0 @@
|
|||||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
==============================================================================*/
|
|
||||||
|
|
||||||
#include "tensorflow/core/platform/cuda_libdevice_path.h"
|
|
||||||
|
|
||||||
#include "tensorflow/core/lib/core/status_test_util.h"
|
|
||||||
#include "tensorflow/core/lib/io/path.h"
|
|
||||||
#include "tensorflow/core/platform/env.h"
|
|
||||||
#include "tensorflow/core/platform/test.h"
|
|
||||||
|
|
||||||
namespace tensorflow {
|
|
||||||
|
|
||||||
#if GOOGLE_CUDA
|
|
||||||
TEST(CudaLibdevicePathTest, LibdevicePath) {
|
|
||||||
VLOG(2) << "Libdevice root = " << LibdeviceRoot();
|
|
||||||
std::vector<string> libdevice_files;
|
|
||||||
TF_EXPECT_OK(Env::Default()->GetMatchingPaths(
|
|
||||||
io::JoinPath(LibdeviceRoot(), "libdevice.*.bc"), &libdevice_files));
|
|
||||||
EXPECT_LT(0, libdevice_files.size());
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace tensorflow
|
|
@ -16,6 +16,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/platform/cuda_libdevice_path.h"
|
#include "tensorflow/core/platform/cuda_libdevice_path.h"
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#if !defined(PLATFORM_GOOGLE)
|
#if !defined(PLATFORM_GOOGLE)
|
||||||
#include "cuda/cuda_config.h"
|
#include "cuda/cuda_config.h"
|
||||||
@ -24,9 +25,9 @@ limitations under the License.
|
|||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
string CudaRoot() {
|
std::vector<string> CandidateCudaRoots() {
|
||||||
VLOG(3) << "CUDA root = " << TF_CUDA_TOOLKIT_PATH;
|
VLOG(3) << "CUDA root = " << TF_CUDA_TOOLKIT_PATH;
|
||||||
return TF_CUDA_TOOLKIT_PATH;
|
return {TF_CUDA_TOOLKIT_PATH};
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
Loading…
Reference in New Issue
Block a user