Merge pull request #21508 from aaroey:fix_allocator_when_engine_device_not_specified
PiperOrigin-RevId: 209647689
This commit is contained in:
commit
7cc6abab4e
@ -280,6 +280,7 @@ tf_cuda_library(
|
||||
"//tensorflow/core/grappler:grappler_item",
|
||||
"//tensorflow/core/grappler:utils",
|
||||
"//tensorflow/core:framework_lite",
|
||||
"//tensorflow/core:gpu_runtime",
|
||||
"//tensorflow/core:graph",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
@ -293,6 +294,31 @@ tf_cuda_library(
|
||||
]) + tf_custom_op_library_additional_deps(),
|
||||
)
|
||||
|
||||
tf_cuda_cc_test(
|
||||
name = "convert_graph_test",
|
||||
size = "medium",
|
||||
srcs = ["convert/convert_graph_test.cc"],
|
||||
tags = [
|
||||
"no_cuda_on_cpu_tap",
|
||||
"no_windows",
|
||||
"nomac",
|
||||
],
|
||||
deps = [
|
||||
":trt_conversion",
|
||||
"//tensorflow/core/grappler:grappler_item",
|
||||
"//tensorflow/core/grappler/clusters:cluster",
|
||||
"//tensorflow/core:core_cpu",
|
||||
"//tensorflow/core:core_cpu_base",
|
||||
"//tensorflow/core:direct_session",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
] + if_tensorrt([
|
||||
"@local_config_tensorrt//:nv_infer",
|
||||
]),
|
||||
)
|
||||
|
||||
# Library for the segmenting portion of TensorRT operation creation
|
||||
cc_library(
|
||||
name = "segment",
|
||||
|
@ -31,6 +31,9 @@ limitations under the License.
|
||||
#include "tensorflow/contrib/tensorrt/resources/trt_resources.h"
|
||||
#include "tensorflow/contrib/tensorrt/segment/segment.h"
|
||||
#include "tensorflow/contrib/tensorrt/test/utils.h"
|
||||
#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
|
||||
#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
|
||||
#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
|
||||
#include "tensorflow/core/framework/function.h"
|
||||
#include "tensorflow/core/framework/graph_to_functiondef.h"
|
||||
#include "tensorflow/core/framework/node_def_builder.h"
|
||||
@ -772,15 +775,38 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
|
||||
const ConversionParams& params, const EngineInfo& engine) {
|
||||
int cuda_device_id = -1;
|
||||
tensorflow::Allocator* dev_allocator = nullptr;
|
||||
if (params.cluster) {
|
||||
if (params.cluster == nullptr || params.cluster->GetDeviceSet() == nullptr ||
|
||||
engine.device.empty()) {
|
||||
// If device is not set, use the first found GPU device for the conversion.
|
||||
for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) {
|
||||
TfGpuId tf_gpu_id(tf_gpu_id_value);
|
||||
CudaGpuId cuda_gpu_id;
|
||||
Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
|
||||
if (s.ok()) {
|
||||
VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device "
|
||||
<< cuda_gpu_id.value();
|
||||
cuda_device_id = cuda_gpu_id.value();
|
||||
GPUOptions gpu_options;
|
||||
// If the TF to Cuda gpu id mapping exist, the device and corresponding
|
||||
// allocator must have been initialized already, so the
|
||||
// GetGPUAllocator() call won't create a new allocator.
|
||||
dev_allocator = GPUProcessState::singleton()->GetGPUAllocator(
|
||||
gpu_options, tf_gpu_id, 1);
|
||||
break;
|
||||
}
|
||||
LOG(ERROR) << "TF GPU with id " << tf_gpu_id_value << " does not exist "
|
||||
<< s;
|
||||
}
|
||||
return std::make_pair(cuda_device_id, dev_allocator);
|
||||
}
|
||||
|
||||
// Use the device requested by the engine.
|
||||
auto device_set = params.cluster->GetDeviceSet();
|
||||
std::vector<tensorflow::Device*> devices;
|
||||
if (!engine.device.empty() && params.cluster->GetDeviceSet()) {
|
||||
DeviceNameUtils::ParsedName parsed_name;
|
||||
if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) &&
|
||||
parsed_name.has_id) {
|
||||
params.cluster->GetDeviceSet()->FindMatchingDevices(parsed_name,
|
||||
&devices);
|
||||
}
|
||||
device_set->FindMatchingDevices(parsed_name, &devices);
|
||||
}
|
||||
if (!devices.empty()) {
|
||||
if (devices.size() > 1) {
|
||||
@ -799,7 +825,6 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
|
||||
LOG(WARNING) << "Cluster is set but device '" << engine.device
|
||||
<< "' is not found in the cluster";
|
||||
}
|
||||
}
|
||||
return std::make_pair(cuda_device_id, dev_allocator);
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
|
||||
#include "tensorflow/core/framework/graph.pb.h"
|
||||
#include "tensorflow/core/grappler/clusters/cluster.h"
|
||||
#include "tensorflow/core/grappler/costs/graph_properties.h"
|
||||
@ -84,6 +85,11 @@ std::vector<int> GetLinkedTensorRTVersion();
|
||||
|
||||
// Return runtime time TensorRT library version information.
|
||||
std::vector<int> GetLoadedTensorRTVersion();
|
||||
|
||||
// Helper method for the conversion, expose for testing.
|
||||
std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
|
||||
const ConversionParams& params, const EngineInfo& engine);
|
||||
|
||||
} // namespace convert
|
||||
} // namespace tensorrt
|
||||
} // namespace tensorflow
|
||||
|
140
tensorflow/contrib/tensorrt/convert/convert_graph_test.cc
Normal file
140
tensorflow/contrib/tensorrt/convert/convert_graph_test.cc
Normal file
@ -0,0 +1,140 @@
|
||||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/contrib/tensorrt/convert/convert_graph.h"
|
||||
|
||||
#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
|
||||
#include "tensorflow/core/common_runtime/device_mgr.h"
|
||||
#include "tensorflow/core/common_runtime/device_set.h"
|
||||
#include "tensorflow/core/grappler/clusters/cluster.h"
|
||||
#include "tensorflow/core/grappler/grappler_item.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/lib/core/status_test_util.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
#include "tensorflow/core/protobuf/config.pb.h" // NOLINT
|
||||
#include "tensorflow/core/public/session.h"
|
||||
|
||||
#if GOOGLE_CUDA
|
||||
#if GOOGLE_TENSORRT
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tensorrt {
|
||||
namespace convert {
|
||||
|
||||
class FakeCluster : public grappler::Cluster {
|
||||
public:
|
||||
FakeCluster() : Cluster(0) {}
|
||||
|
||||
void SetDeviceSet(const DeviceSet* device_set) { device_set_ = device_set; }
|
||||
|
||||
const DeviceSet* GetDeviceSet() const override { return device_set_; }
|
||||
|
||||
string type() const override { return ""; }
|
||||
Status Provision() override { return Status::OK(); }
|
||||
Status Initialize(const grappler::GrapplerItem& item) override {
|
||||
return Status::OK();
|
||||
}
|
||||
Status Run(const GraphDef& graph_def,
|
||||
const std::vector<std::pair<string, Tensor>>& feed,
|
||||
const std::vector<string>& fetch,
|
||||
RunMetadata* metadata) override {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
private:
|
||||
const DeviceSet* device_set_;
|
||||
};
|
||||
|
||||
TEST(ConvertGraphTest, GetDeviceAndAllocator) {
|
||||
ConversionParams params;
|
||||
EngineInfo engine_info;
|
||||
{
|
||||
// params.cluster is not set, and no gpu device is available.
|
||||
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||
EXPECT_EQ(-1, result.first);
|
||||
EXPECT_EQ(nullptr, result.second);
|
||||
}
|
||||
|
||||
// Create a session with two (virtual) gpu device.
|
||||
SessionOptions options;
|
||||
ConfigProto* config = &options.config;
|
||||
GPUOptions* gpu_options = config->mutable_gpu_options();
|
||||
auto virtual_devices =
|
||||
gpu_options->mutable_experimental()->add_virtual_devices();
|
||||
virtual_devices->add_memory_limit_mb(200);
|
||||
virtual_devices->add_memory_limit_mb(200);
|
||||
std::unique_ptr<Session> session(NewSession(options));
|
||||
|
||||
{
|
||||
// params.cluster is not set, should find and return first gpu id and
|
||||
// corresponding allocator.
|
||||
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||
EXPECT_EQ(0, result.first);
|
||||
EXPECT_NE(nullptr, result.second);
|
||||
EXPECT_EQ("GPU_0_bfc", result.second->Name());
|
||||
}
|
||||
|
||||
FakeCluster cluster;
|
||||
params.cluster = &cluster;
|
||||
{
|
||||
// params.cluster->GetDeviceSet() returns null, should find and return first
|
||||
// gpu id and corresponding allocator.
|
||||
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||
EXPECT_EQ(0, result.first);
|
||||
EXPECT_NE(nullptr, result.second);
|
||||
EXPECT_EQ("GPU_0_bfc", result.second->Name());
|
||||
}
|
||||
|
||||
// Build the DeviceSet.
|
||||
DeviceSet device_set;
|
||||
const DeviceMgr* device_mgr = nullptr;
|
||||
TF_ASSERT_OK(session->LocalDeviceManager(&device_mgr));
|
||||
for (auto d : device_mgr->ListDevices()) {
|
||||
device_set.AddDevice(d);
|
||||
}
|
||||
cluster.SetDeviceSet(&device_set);
|
||||
{
|
||||
// engine_info.device is not set, should find and return first gpu id and
|
||||
// corresponding allocator.
|
||||
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||
EXPECT_EQ(0, result.first);
|
||||
EXPECT_NE(nullptr, result.second);
|
||||
EXPECT_EQ("GPU_0_bfc", result.second->Name());
|
||||
}
|
||||
|
||||
engine_info.device = "/GPU:1";
|
||||
{
|
||||
// Set to use second device.
|
||||
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||
EXPECT_EQ(0, result.first);
|
||||
EXPECT_NE(nullptr, result.second);
|
||||
EXPECT_EQ("GPU_1_bfc", result.second->Name());
|
||||
}
|
||||
|
||||
engine_info.device = "/GPU:3";
|
||||
{
|
||||
// Set to use nonexistent device.
|
||||
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||
EXPECT_EQ(-1, result.first);
|
||||
EXPECT_EQ(nullptr, result.second);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace convert
|
||||
} // namespace tensorrt
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // GOOGLE_TENSORRT
|
||||
#endif // GOOGLE_CUDA
|
@ -77,6 +77,10 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tensorrt {
|
||||
// TODO(aaroey): put these constants into some class.
|
||||
const char* const kInputPHName = "TensorRTInputPH_";
|
||||
const char* const kOutputPHName = "TensorRTOutputPH_";
|
||||
|
||||
namespace convert {
|
||||
using ::tensorflow::str_util::Split;
|
||||
using ::tensorflow::strings::StrAppend;
|
||||
|
@ -36,8 +36,9 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
namespace tensorrt {
|
||||
static const char* kInputPHName = "TensorRTInputPH_";
|
||||
static const char* kOutputPHName = "TensorRTOutputPH_";
|
||||
extern const char* const kInputPHName;
|
||||
extern const char* const kOutputPHName;
|
||||
|
||||
namespace convert {
|
||||
|
||||
struct EngineConnection {
|
||||
|
Loading…
Reference in New Issue
Block a user