Improve CUDA runtime dependencies search.
tensorflow::CudaRoot() now may return multiple possible locations of the CUDA root. PiperOrigin-RevId: 225091635
This commit is contained in:
parent
33bc0b9788
commit
bafb874798
@ -108,27 +108,33 @@ namespace {
|
||||
|
||||
namespace tracing = tensorflow::tracing;
|
||||
|
||||
// Returns the directory containing nvvm libdevice files. config_cuda_data_dir
|
||||
// should be equal to config().debug_options().xla_gpu_cuda_data_dir() of the
|
||||
// HloModule being compiled.
|
||||
string GetLibdeviceDir(const string& config_cuda_data_dir) {
|
||||
std::vector<string> potential_libdevice_dirs;
|
||||
if (!config_cuda_data_dir.empty()) {
|
||||
potential_libdevice_dirs.push_back(config_cuda_data_dir);
|
||||
}
|
||||
potential_libdevice_dirs.push_back(tensorflow::LibdeviceRoot());
|
||||
// Returns a vector of potential locations of the CUDA root directory.
|
||||
std::vector<string> GetCudaRootCandidates(
|
||||
const HloModuleConfig& hlo_module_config) {
|
||||
std::vector<string> potential_cuda_roots = tensorflow::CandidateCudaRoots();
|
||||
|
||||
// Tries all potential libdevice directories in the order they are inserted.
|
||||
// Returns the first directory that exists in the file system.
|
||||
for (const string& potential_libdevice_dir : potential_libdevice_dirs) {
|
||||
if (tensorflow::Env::Default()->IsDirectory(potential_libdevice_dir).ok()) {
|
||||
VLOG(2) << "Found libdevice dir " << potential_libdevice_dir;
|
||||
return potential_libdevice_dir;
|
||||
// CUDA location explicitly specified by user via --xla_gpu_cuda_data_dir has
|
||||
// highest priority.
|
||||
string xla_gpu_cuda_data_dir =
|
||||
hlo_module_config.debug_options().xla_gpu_cuda_data_dir();
|
||||
if (!xla_gpu_cuda_data_dir.empty()) {
|
||||
potential_cuda_roots.insert(potential_cuda_roots.begin(),
|
||||
xla_gpu_cuda_data_dir);
|
||||
}
|
||||
return potential_cuda_roots;
|
||||
}
|
||||
|
||||
// Returns the directory containing nvvm libdevice files.
|
||||
string GetLibdeviceDir(const HloModuleConfig& hlo_module_config) {
|
||||
for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
|
||||
string libdevice_dir =
|
||||
tensorflow::io::JoinPath(cuda_root, "nvvm", "libdevice");
|
||||
VLOG(2) << "Looking for libdevice at " << libdevice_dir;
|
||||
if (tensorflow::Env::Default()->IsDirectory(libdevice_dir).ok()) {
|
||||
VLOG(2) << "Found libdevice dir " << libdevice_dir;
|
||||
return libdevice_dir;
|
||||
}
|
||||
VLOG(2) << "Unable to find potential libdevice dir "
|
||||
<< potential_libdevice_dir;
|
||||
}
|
||||
|
||||
LOG(WARNING) << "Unable to find libdevice dir. Using '.'";
|
||||
// Last resort: maybe in the current folder.
|
||||
return ".";
|
||||
@ -478,14 +484,19 @@ void WarnIfBadDriverJITVersion() {
|
||||
|
||||
// Compiles the given PTX string using ptxas and returns the resulting machine
|
||||
// code (i.e. a cubin) as a byte array.
|
||||
StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
|
||||
int cc_minor,
|
||||
bool disable_ptx_optimizations) {
|
||||
StatusOr<std::vector<uint8>> CompilePtx(
|
||||
const string& ptx, int cc_major, int cc_minor,
|
||||
const HloModuleConfig& hlo_module_config) {
|
||||
tracing::ScopedActivity activity("Compile PTX", /*is_expensive=*/true);
|
||||
const string ptxas_path =
|
||||
tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas");
|
||||
VLOG(2) << "Checking ptxas at " << ptxas_path;
|
||||
auto env = tensorflow::Env::Default();
|
||||
string ptxas_path;
|
||||
for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
|
||||
ptxas_path = tensorflow::io::JoinPath(cuda_root, "bin", "ptxas");
|
||||
VLOG(2) << "Looking for ptxas at " << ptxas_path;
|
||||
if (env->FileExists(ptxas_path).ok()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
TF_RETURN_IF_ERROR(env->FileExists(ptxas_path));
|
||||
VLOG(2) << "Using ptxas at " << ptxas_path;
|
||||
|
||||
@ -520,7 +531,7 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
|
||||
if (VLOG_IS_ON(2)) {
|
||||
ptxas_args.push_back("-v");
|
||||
}
|
||||
if (disable_ptx_optimizations) {
|
||||
if (hlo_module_config.debug_options().xla_gpu_disable_ptxas_optimizations()) {
|
||||
ptxas_args.push_back("-O0");
|
||||
}
|
||||
ptxas_info_dumper.SetProgram(ptxas_path, ptxas_args);
|
||||
@ -685,12 +696,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
|
||||
// Find the directory containing libdevice. To avoid searching for it every
|
||||
// time, we have a one-element cache, keyed on the module's config's
|
||||
// cuda_data_dir.
|
||||
const auto& config_cuda_data_dir =
|
||||
module->config().debug_options().xla_gpu_cuda_data_dir();
|
||||
if (cached_libdevice_dir_.empty() ||
|
||||
cached_cuda_data_dir_ != config_cuda_data_dir) {
|
||||
cached_cuda_data_dir_ = config_cuda_data_dir;
|
||||
cached_libdevice_dir_ = GetLibdeviceDir(config_cuda_data_dir);
|
||||
if (cached_libdevice_dir_.empty()) {
|
||||
cached_libdevice_dir_ = GetLibdeviceDir(module->config());
|
||||
}
|
||||
libdevice_dir = cached_libdevice_dir_;
|
||||
}
|
||||
@ -743,9 +750,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<uint8> cubin = CompilePtxOrGetCachedResult(
|
||||
ptx, cc_major, cc_minor,
|
||||
module->config().debug_options().xla_gpu_disable_ptxas_optimizations());
|
||||
const std::vector<uint8> cubin =
|
||||
CompilePtxOrGetCachedResult(ptx, cc_major, cc_minor, module->config());
|
||||
|
||||
auto thunk_schedule = absl::make_unique<ThunkSchedule>(
|
||||
ir_emitter.ConsumeThunkSequence(), std::move(stream_assignment),
|
||||
@ -779,7 +785,7 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
|
||||
|
||||
std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
|
||||
const string& ptx, int cc_major, int cc_minor,
|
||||
bool disable_ptx_optimizations) {
|
||||
const HloModuleConfig& hlo_module_config) {
|
||||
XLA_SCOPED_LOGGING_TIMER("NVPTXCompiler::CompilePtxOrGetCachedResult");
|
||||
tracing::ScopedActivity activity("PTX->CUBIN", /*is_expensive=*/true);
|
||||
bool inserted;
|
||||
@ -807,8 +813,8 @@ std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
|
||||
if (inserted) {
|
||||
CHECK(!cache_value->compilation_done);
|
||||
if (!ptx.empty()) {
|
||||
StatusOr<std::vector<uint8>> maybe_cubin = CompilePtx(
|
||||
*cache_ptx, cc_major, cc_minor, disable_ptx_optimizations);
|
||||
StatusOr<std::vector<uint8>> maybe_cubin =
|
||||
CompilePtx(*cache_ptx, cc_major, cc_minor, hlo_module_config);
|
||||
if (maybe_cubin.ok()) {
|
||||
cache_value->cubin_data = std::move(maybe_cubin).ValueOrDie();
|
||||
VLOG(2) << "Compiled PTX size:" << ptx.size()
|
||||
|
@ -99,7 +99,7 @@ class NVPTXCompiler : public LLVMCompiler {
|
||||
// compiled cubin. If compilation was unsuccessful, returns an empty vector.
|
||||
std::vector<uint8> CompilePtxOrGetCachedResult(
|
||||
const string& ptx, int cc_major, int cc_minor,
|
||||
bool disable_ptx_optimizations);
|
||||
const HloModuleConfig& hlo_module_config);
|
||||
|
||||
// The compilation_cache_ map is a cache from {ptx string, cc_major, cc_minor}
|
||||
// -> cubin so we don't recompile the same ptx twice. This is important for
|
||||
|
@ -4062,20 +4062,6 @@ tf_cuda_cc_test(
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test_gpu(
|
||||
name = "cuda_libdevice_path_test",
|
||||
size = "small",
|
||||
srcs = ["platform/cuda_libdevice_path_test.cc"],
|
||||
linkstatic = tf_kernel_tests_linkstatic(),
|
||||
tags = tf_cuda_tests_tags(),
|
||||
deps = [
|
||||
":cuda_libdevice_path",
|
||||
":lib",
|
||||
":test",
|
||||
":test_main",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cuda_only_cc_test(
|
||||
name = "util_cuda_kernel_helper_test",
|
||||
srcs = [
|
||||
@ -4931,7 +4917,7 @@ filegroup(
|
||||
|
||||
cc_library(
|
||||
name = "cuda_libdevice_path",
|
||||
srcs = ["platform/cuda_libdevice_path.cc"] + tf_additional_libdevice_srcs(),
|
||||
srcs = tf_additional_libdevice_srcs(),
|
||||
hdrs = ["platform/cuda_libdevice_path.h"],
|
||||
copts = tf_copts(),
|
||||
data = tf_additional_libdevice_data(),
|
||||
|
@ -1,26 +0,0 @@
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/platform/cuda_libdevice_path.h"
|
||||
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
string LibdeviceRoot() {
|
||||
return tensorflow::io::JoinPath(tensorflow::CudaRoot(), "nvvm/libdevice");
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
@ -16,16 +16,14 @@ limitations under the License.
|
||||
#ifndef TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
|
||||
#define TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
|
||||
|
||||
#include <vector>
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// Returns the root directory of the CUDA SDK, which contains sub-folders such
|
||||
// as bin, lib64, and nvvm.
|
||||
string CudaRoot();
|
||||
|
||||
// Returns the directory that contains nvvm libdevice files in the CUDA SDK.
|
||||
string LibdeviceRoot();
|
||||
// Returns, in order of preference, potential locations of the root directory of
|
||||
// the CUDA SDK, which contains sub-folders such as bin, lib64, and nvvm.
|
||||
std::vector<string> CandidateCudaRoots();
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
|
@ -1,35 +0,0 @@
|
||||
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/platform/cuda_libdevice_path.h"
|
||||
|
||||
#include "tensorflow/core/lib/core/status_test_util.h"
|
||||
#include "tensorflow/core/lib/io/path.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
TEST(CudaLibdevicePathTest, LibdevicePath) {
|
||||
VLOG(2) << "Libdevice root = " << LibdeviceRoot();
|
||||
std::vector<string> libdevice_files;
|
||||
TF_EXPECT_OK(Env::Default()->GetMatchingPaths(
|
||||
io::JoinPath(LibdeviceRoot(), "libdevice.*.bc"), &libdevice_files));
|
||||
EXPECT_LT(0, libdevice_files.size());
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace tensorflow
|
@ -16,6 +16,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/cuda_libdevice_path.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <vector>
|
||||
|
||||
#if !defined(PLATFORM_GOOGLE)
|
||||
#include "cuda/cuda_config.h"
|
||||
@ -24,9 +25,9 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
string CudaRoot() {
|
||||
std::vector<string> CandidateCudaRoots() {
|
||||
VLOG(3) << "CUDA root = " << TF_CUDA_TOOLKIT_PATH;
|
||||
return TF_CUDA_TOOLKIT_PATH;
|
||||
return {TF_CUDA_TOOLKIT_PATH};
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
||||
|
Loading…
Reference in New Issue
Block a user