Merge pull request #21508 from aaroey:fix_allocator_when_engine_device_not_specified

PiperOrigin-RevId: 209647689
This commit is contained in:
TensorFlower Gardener 2018-08-21 13:20:25 -07:00
commit 7cc6abab4e
6 changed files with 228 additions and 26 deletions

View File

@ -280,6 +280,7 @@ tf_cuda_library(
"//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:grappler_item",
"//tensorflow/core/grappler:utils", "//tensorflow/core/grappler:utils",
"//tensorflow/core:framework_lite", "//tensorflow/core:framework_lite",
"//tensorflow/core:gpu_runtime",
"//tensorflow/core:graph", "//tensorflow/core:graph",
"//tensorflow/core:lib", "//tensorflow/core:lib",
"//tensorflow/core:lib_internal", "//tensorflow/core:lib_internal",
@ -293,6 +294,31 @@ tf_cuda_library(
]) + tf_custom_op_library_additional_deps(), ]) + tf_custom_op_library_additional_deps(),
) )
tf_cuda_cc_test(
name = "convert_graph_test",
size = "medium",
srcs = ["convert/convert_graph_test.cc"],
tags = [
"no_cuda_on_cpu_tap",
"no_windows",
"nomac",
],
deps = [
":trt_conversion",
"//tensorflow/core/grappler:grappler_item",
"//tensorflow/core/grappler/clusters:cluster",
"//tensorflow/core:core_cpu",
"//tensorflow/core:core_cpu_base",
"//tensorflow/core:direct_session",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
] + if_tensorrt([
"@local_config_tensorrt//:nv_infer",
]),
)
# Library for the segmenting portion of TensorRT operation creation # Library for the segmenting portion of TensorRT operation creation
cc_library( cc_library(
name = "segment", name = "segment",

View File

@ -31,6 +31,9 @@ limitations under the License.
#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" #include "tensorflow/contrib/tensorrt/resources/trt_resources.h"
#include "tensorflow/contrib/tensorrt/segment/segment.h" #include "tensorflow/contrib/tensorrt/segment/segment.h"
#include "tensorflow/contrib/tensorrt/test/utils.h" #include "tensorflow/contrib/tensorrt/test/utils.h"
#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
#include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/function.h"
#include "tensorflow/core/framework/graph_to_functiondef.h" #include "tensorflow/core/framework/graph_to_functiondef.h"
#include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/node_def_builder.h"
@ -772,15 +775,38 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
const ConversionParams& params, const EngineInfo& engine) { const ConversionParams& params, const EngineInfo& engine) {
int cuda_device_id = -1; int cuda_device_id = -1;
tensorflow::Allocator* dev_allocator = nullptr; tensorflow::Allocator* dev_allocator = nullptr;
if (params.cluster) { if (params.cluster == nullptr || params.cluster->GetDeviceSet() == nullptr ||
engine.device.empty()) {
// If device is not set, use the first found GPU device for the conversion.
for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) {
TfGpuId tf_gpu_id(tf_gpu_id_value);
CudaGpuId cuda_gpu_id;
Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
if (s.ok()) {
VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device "
<< cuda_gpu_id.value();
cuda_device_id = cuda_gpu_id.value();
GPUOptions gpu_options;
// If the TF to Cuda gpu id mapping exist, the device and corresponding
// allocator must have been initialized already, so the
// GetGPUAllocator() call won't create a new allocator.
dev_allocator = GPUProcessState::singleton()->GetGPUAllocator(
gpu_options, tf_gpu_id, 1);
break;
}
LOG(ERROR) << "TF GPU with id " << tf_gpu_id_value << " does not exist "
<< s;
}
return std::make_pair(cuda_device_id, dev_allocator);
}
// Use the device requested by the engine.
auto device_set = params.cluster->GetDeviceSet();
std::vector<tensorflow::Device*> devices; std::vector<tensorflow::Device*> devices;
if (!engine.device.empty() && params.cluster->GetDeviceSet()) {
DeviceNameUtils::ParsedName parsed_name; DeviceNameUtils::ParsedName parsed_name;
if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) && if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) &&
parsed_name.has_id) { parsed_name.has_id) {
params.cluster->GetDeviceSet()->FindMatchingDevices(parsed_name, device_set->FindMatchingDevices(parsed_name, &devices);
&devices);
}
} }
if (!devices.empty()) { if (!devices.empty()) {
if (devices.size() > 1) { if (devices.size() > 1) {
@ -799,7 +825,6 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
LOG(WARNING) << "Cluster is set but device '" << engine.device LOG(WARNING) << "Cluster is set but device '" << engine.device
<< "' is not found in the cluster"; << "' is not found in the cluster";
} }
}
return std::make_pair(cuda_device_id, dev_allocator); return std::make_pair(cuda_device_id, dev_allocator);
} }

View File

@ -17,6 +17,7 @@ limitations under the License.
#include <vector> #include <vector>
#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
#include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/grappler/clusters/cluster.h" #include "tensorflow/core/grappler/clusters/cluster.h"
#include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/costs/graph_properties.h"
@ -84,6 +85,11 @@ std::vector<int> GetLinkedTensorRTVersion();
// Return runtime time TensorRT library version information. // Return runtime time TensorRT library version information.
std::vector<int> GetLoadedTensorRTVersion(); std::vector<int> GetLoadedTensorRTVersion();
// Helper method for the conversion, expose for testing.
std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
const ConversionParams& params, const EngineInfo& engine);
} // namespace convert } // namespace convert
} // namespace tensorrt } // namespace tensorrt
} // namespace tensorflow } // namespace tensorflow

View File

@ -0,0 +1,140 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/contrib/tensorrt/convert/convert_graph.h"
#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
#include "tensorflow/core/common_runtime/device_mgr.h"
#include "tensorflow/core/common_runtime/device_set.h"
#include "tensorflow/core/grappler/clusters/cluster.h"
#include "tensorflow/core/grappler/grappler_item.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/protobuf/config.pb.h" // NOLINT
#include "tensorflow/core/public/session.h"
#if GOOGLE_CUDA
#if GOOGLE_TENSORRT
namespace tensorflow {
namespace tensorrt {
namespace convert {
class FakeCluster : public grappler::Cluster {
public:
FakeCluster() : Cluster(0) {}
void SetDeviceSet(const DeviceSet* device_set) { device_set_ = device_set; }
const DeviceSet* GetDeviceSet() const override { return device_set_; }
string type() const override { return ""; }
Status Provision() override { return Status::OK(); }
Status Initialize(const grappler::GrapplerItem& item) override {
return Status::OK();
}
Status Run(const GraphDef& graph_def,
const std::vector<std::pair<string, Tensor>>& feed,
const std::vector<string>& fetch,
RunMetadata* metadata) override {
return Status::OK();
}
private:
const DeviceSet* device_set_;
};
TEST(ConvertGraphTest, GetDeviceAndAllocator) {
ConversionParams params;
EngineInfo engine_info;
{
// params.cluster is not set, and no gpu device is available.
auto result = GetDeviceAndAllocator(params, engine_info);
EXPECT_EQ(-1, result.first);
EXPECT_EQ(nullptr, result.second);
}
// Create a session with two (virtual) gpu device.
SessionOptions options;
ConfigProto* config = &options.config;
GPUOptions* gpu_options = config->mutable_gpu_options();
auto virtual_devices =
gpu_options->mutable_experimental()->add_virtual_devices();
virtual_devices->add_memory_limit_mb(200);
virtual_devices->add_memory_limit_mb(200);
std::unique_ptr<Session> session(NewSession(options));
{
// params.cluster is not set, should find and return first gpu id and
// corresponding allocator.
auto result = GetDeviceAndAllocator(params, engine_info);
EXPECT_EQ(0, result.first);
EXPECT_NE(nullptr, result.second);
EXPECT_EQ("GPU_0_bfc", result.second->Name());
}
FakeCluster cluster;
params.cluster = &cluster;
{
// params.cluster->GetDeviceSet() returns null, should find and return first
// gpu id and corresponding allocator.
auto result = GetDeviceAndAllocator(params, engine_info);
EXPECT_EQ(0, result.first);
EXPECT_NE(nullptr, result.second);
EXPECT_EQ("GPU_0_bfc", result.second->Name());
}
// Build the DeviceSet.
DeviceSet device_set;
const DeviceMgr* device_mgr = nullptr;
TF_ASSERT_OK(session->LocalDeviceManager(&device_mgr));
for (auto d : device_mgr->ListDevices()) {
device_set.AddDevice(d);
}
cluster.SetDeviceSet(&device_set);
{
// engine_info.device is not set, should find and return first gpu id and
// corresponding allocator.
auto result = GetDeviceAndAllocator(params, engine_info);
EXPECT_EQ(0, result.first);
EXPECT_NE(nullptr, result.second);
EXPECT_EQ("GPU_0_bfc", result.second->Name());
}
engine_info.device = "/GPU:1";
{
// Set to use second device.
auto result = GetDeviceAndAllocator(params, engine_info);
EXPECT_EQ(0, result.first);
EXPECT_NE(nullptr, result.second);
EXPECT_EQ("GPU_1_bfc", result.second->Name());
}
engine_info.device = "/GPU:3";
{
// Set to use nonexistent device.
auto result = GetDeviceAndAllocator(params, engine_info);
EXPECT_EQ(-1, result.first);
EXPECT_EQ(nullptr, result.second);
}
}
} // namespace convert
} // namespace tensorrt
} // namespace tensorflow
#endif // GOOGLE_TENSORRT
#endif // GOOGLE_CUDA

View File

@ -77,6 +77,10 @@ limitations under the License.
namespace tensorflow { namespace tensorflow {
namespace tensorrt { namespace tensorrt {
// TODO(aaroey): put these constants into some class.
const char* const kInputPHName = "TensorRTInputPH_";
const char* const kOutputPHName = "TensorRTOutputPH_";
namespace convert { namespace convert {
using ::tensorflow::str_util::Split; using ::tensorflow::str_util::Split;
using ::tensorflow::strings::StrAppend; using ::tensorflow::strings::StrAppend;

View File

@ -36,8 +36,9 @@ limitations under the License.
namespace tensorflow { namespace tensorflow {
namespace tensorrt { namespace tensorrt {
static const char* kInputPHName = "TensorRTInputPH_"; extern const char* const kInputPHName;
static const char* kOutputPHName = "TensorRTOutputPH_"; extern const char* const kOutputPHName;
namespace convert { namespace convert {
struct EngineConnection { struct EngineConnection {