Merge pull request #21508 from aaroey:fix_allocator_when_engine_device_not_specified
PiperOrigin-RevId: 209647689
This commit is contained in:
commit
7cc6abab4e
@ -280,6 +280,7 @@ tf_cuda_library(
|
|||||||
"//tensorflow/core/grappler:grappler_item",
|
"//tensorflow/core/grappler:grappler_item",
|
||||||
"//tensorflow/core/grappler:utils",
|
"//tensorflow/core/grappler:utils",
|
||||||
"//tensorflow/core:framework_lite",
|
"//tensorflow/core:framework_lite",
|
||||||
|
"//tensorflow/core:gpu_runtime",
|
||||||
"//tensorflow/core:graph",
|
"//tensorflow/core:graph",
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
"//tensorflow/core:lib_internal",
|
"//tensorflow/core:lib_internal",
|
||||||
@ -293,6 +294,31 @@ tf_cuda_library(
|
|||||||
]) + tf_custom_op_library_additional_deps(),
|
]) + tf_custom_op_library_additional_deps(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
tf_cuda_cc_test(
|
||||||
|
name = "convert_graph_test",
|
||||||
|
size = "medium",
|
||||||
|
srcs = ["convert/convert_graph_test.cc"],
|
||||||
|
tags = [
|
||||||
|
"no_cuda_on_cpu_tap",
|
||||||
|
"no_windows",
|
||||||
|
"nomac",
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
":trt_conversion",
|
||||||
|
"//tensorflow/core/grappler:grappler_item",
|
||||||
|
"//tensorflow/core/grappler/clusters:cluster",
|
||||||
|
"//tensorflow/core:core_cpu",
|
||||||
|
"//tensorflow/core:core_cpu_base",
|
||||||
|
"//tensorflow/core:direct_session",
|
||||||
|
"//tensorflow/core:lib",
|
||||||
|
"//tensorflow/core:protos_all_cc",
|
||||||
|
"//tensorflow/core:test",
|
||||||
|
"//tensorflow/core:test_main",
|
||||||
|
] + if_tensorrt([
|
||||||
|
"@local_config_tensorrt//:nv_infer",
|
||||||
|
]),
|
||||||
|
)
|
||||||
|
|
||||||
# Library for the segmenting portion of TensorRT operation creation
|
# Library for the segmenting portion of TensorRT operation creation
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "segment",
|
name = "segment",
|
||||||
|
@ -31,6 +31,9 @@ limitations under the License.
|
|||||||
#include "tensorflow/contrib/tensorrt/resources/trt_resources.h"
|
#include "tensorflow/contrib/tensorrt/resources/trt_resources.h"
|
||||||
#include "tensorflow/contrib/tensorrt/segment/segment.h"
|
#include "tensorflow/contrib/tensorrt/segment/segment.h"
|
||||||
#include "tensorflow/contrib/tensorrt/test/utils.h"
|
#include "tensorflow/contrib/tensorrt/test/utils.h"
|
||||||
|
#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
|
||||||
|
#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
|
||||||
|
#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
|
||||||
#include "tensorflow/core/framework/function.h"
|
#include "tensorflow/core/framework/function.h"
|
||||||
#include "tensorflow/core/framework/graph_to_functiondef.h"
|
#include "tensorflow/core/framework/graph_to_functiondef.h"
|
||||||
#include "tensorflow/core/framework/node_def_builder.h"
|
#include "tensorflow/core/framework/node_def_builder.h"
|
||||||
@ -772,15 +775,38 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
|
|||||||
const ConversionParams& params, const EngineInfo& engine) {
|
const ConversionParams& params, const EngineInfo& engine) {
|
||||||
int cuda_device_id = -1;
|
int cuda_device_id = -1;
|
||||||
tensorflow::Allocator* dev_allocator = nullptr;
|
tensorflow::Allocator* dev_allocator = nullptr;
|
||||||
if (params.cluster) {
|
if (params.cluster == nullptr || params.cluster->GetDeviceSet() == nullptr ||
|
||||||
|
engine.device.empty()) {
|
||||||
|
// If device is not set, use the first found GPU device for the conversion.
|
||||||
|
for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) {
|
||||||
|
TfGpuId tf_gpu_id(tf_gpu_id_value);
|
||||||
|
CudaGpuId cuda_gpu_id;
|
||||||
|
Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
|
||||||
|
if (s.ok()) {
|
||||||
|
VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device "
|
||||||
|
<< cuda_gpu_id.value();
|
||||||
|
cuda_device_id = cuda_gpu_id.value();
|
||||||
|
GPUOptions gpu_options;
|
||||||
|
// If the TF to Cuda gpu id mapping exist, the device and corresponding
|
||||||
|
// allocator must have been initialized already, so the
|
||||||
|
// GetGPUAllocator() call won't create a new allocator.
|
||||||
|
dev_allocator = GPUProcessState::singleton()->GetGPUAllocator(
|
||||||
|
gpu_options, tf_gpu_id, 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
LOG(ERROR) << "TF GPU with id " << tf_gpu_id_value << " does not exist "
|
||||||
|
<< s;
|
||||||
|
}
|
||||||
|
return std::make_pair(cuda_device_id, dev_allocator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the device requested by the engine.
|
||||||
|
auto device_set = params.cluster->GetDeviceSet();
|
||||||
std::vector<tensorflow::Device*> devices;
|
std::vector<tensorflow::Device*> devices;
|
||||||
if (!engine.device.empty() && params.cluster->GetDeviceSet()) {
|
|
||||||
DeviceNameUtils::ParsedName parsed_name;
|
DeviceNameUtils::ParsedName parsed_name;
|
||||||
if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) &&
|
if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) &&
|
||||||
parsed_name.has_id) {
|
parsed_name.has_id) {
|
||||||
params.cluster->GetDeviceSet()->FindMatchingDevices(parsed_name,
|
device_set->FindMatchingDevices(parsed_name, &devices);
|
||||||
&devices);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (!devices.empty()) {
|
if (!devices.empty()) {
|
||||||
if (devices.size() > 1) {
|
if (devices.size() > 1) {
|
||||||
@ -799,7 +825,6 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
|
|||||||
LOG(WARNING) << "Cluster is set but device '" << engine.device
|
LOG(WARNING) << "Cluster is set but device '" << engine.device
|
||||||
<< "' is not found in the cluster";
|
<< "' is not found in the cluster";
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return std::make_pair(cuda_device_id, dev_allocator);
|
return std::make_pair(cuda_device_id, dev_allocator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
|
||||||
#include "tensorflow/core/framework/graph.pb.h"
|
#include "tensorflow/core/framework/graph.pb.h"
|
||||||
#include "tensorflow/core/grappler/clusters/cluster.h"
|
#include "tensorflow/core/grappler/clusters/cluster.h"
|
||||||
#include "tensorflow/core/grappler/costs/graph_properties.h"
|
#include "tensorflow/core/grappler/costs/graph_properties.h"
|
||||||
@ -84,6 +85,11 @@ std::vector<int> GetLinkedTensorRTVersion();
|
|||||||
|
|
||||||
// Return runtime time TensorRT library version information.
|
// Return runtime time TensorRT library version information.
|
||||||
std::vector<int> GetLoadedTensorRTVersion();
|
std::vector<int> GetLoadedTensorRTVersion();
|
||||||
|
|
||||||
|
// Helper method for the conversion, expose for testing.
|
||||||
|
std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
|
||||||
|
const ConversionParams& params, const EngineInfo& engine);
|
||||||
|
|
||||||
} // namespace convert
|
} // namespace convert
|
||||||
} // namespace tensorrt
|
} // namespace tensorrt
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
140
tensorflow/contrib/tensorrt/convert/convert_graph_test.cc
Normal file
140
tensorflow/contrib/tensorrt/convert/convert_graph_test.cc
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/contrib/tensorrt/convert/convert_graph.h"
|
||||||
|
|
||||||
|
#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
|
||||||
|
#include "tensorflow/core/common_runtime/device_mgr.h"
|
||||||
|
#include "tensorflow/core/common_runtime/device_set.h"
|
||||||
|
#include "tensorflow/core/grappler/clusters/cluster.h"
|
||||||
|
#include "tensorflow/core/grappler/grappler_item.h"
|
||||||
|
#include "tensorflow/core/lib/core/status.h"
|
||||||
|
#include "tensorflow/core/lib/core/status_test_util.h"
|
||||||
|
#include "tensorflow/core/platform/test.h"
|
||||||
|
#include "tensorflow/core/protobuf/config.pb.h" // NOLINT
|
||||||
|
#include "tensorflow/core/public/session.h"
|
||||||
|
|
||||||
|
#if GOOGLE_CUDA
|
||||||
|
#if GOOGLE_TENSORRT
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
namespace tensorrt {
|
||||||
|
namespace convert {
|
||||||
|
|
||||||
|
class FakeCluster : public grappler::Cluster {
|
||||||
|
public:
|
||||||
|
FakeCluster() : Cluster(0) {}
|
||||||
|
|
||||||
|
void SetDeviceSet(const DeviceSet* device_set) { device_set_ = device_set; }
|
||||||
|
|
||||||
|
const DeviceSet* GetDeviceSet() const override { return device_set_; }
|
||||||
|
|
||||||
|
string type() const override { return ""; }
|
||||||
|
Status Provision() override { return Status::OK(); }
|
||||||
|
Status Initialize(const grappler::GrapplerItem& item) override {
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
Status Run(const GraphDef& graph_def,
|
||||||
|
const std::vector<std::pair<string, Tensor>>& feed,
|
||||||
|
const std::vector<string>& fetch,
|
||||||
|
RunMetadata* metadata) override {
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const DeviceSet* device_set_;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(ConvertGraphTest, GetDeviceAndAllocator) {
|
||||||
|
ConversionParams params;
|
||||||
|
EngineInfo engine_info;
|
||||||
|
{
|
||||||
|
// params.cluster is not set, and no gpu device is available.
|
||||||
|
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||||
|
EXPECT_EQ(-1, result.first);
|
||||||
|
EXPECT_EQ(nullptr, result.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a session with two (virtual) gpu device.
|
||||||
|
SessionOptions options;
|
||||||
|
ConfigProto* config = &options.config;
|
||||||
|
GPUOptions* gpu_options = config->mutable_gpu_options();
|
||||||
|
auto virtual_devices =
|
||||||
|
gpu_options->mutable_experimental()->add_virtual_devices();
|
||||||
|
virtual_devices->add_memory_limit_mb(200);
|
||||||
|
virtual_devices->add_memory_limit_mb(200);
|
||||||
|
std::unique_ptr<Session> session(NewSession(options));
|
||||||
|
|
||||||
|
{
|
||||||
|
// params.cluster is not set, should find and return first gpu id and
|
||||||
|
// corresponding allocator.
|
||||||
|
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||||
|
EXPECT_EQ(0, result.first);
|
||||||
|
EXPECT_NE(nullptr, result.second);
|
||||||
|
EXPECT_EQ("GPU_0_bfc", result.second->Name());
|
||||||
|
}
|
||||||
|
|
||||||
|
FakeCluster cluster;
|
||||||
|
params.cluster = &cluster;
|
||||||
|
{
|
||||||
|
// params.cluster->GetDeviceSet() returns null, should find and return first
|
||||||
|
// gpu id and corresponding allocator.
|
||||||
|
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||||
|
EXPECT_EQ(0, result.first);
|
||||||
|
EXPECT_NE(nullptr, result.second);
|
||||||
|
EXPECT_EQ("GPU_0_bfc", result.second->Name());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the DeviceSet.
|
||||||
|
DeviceSet device_set;
|
||||||
|
const DeviceMgr* device_mgr = nullptr;
|
||||||
|
TF_ASSERT_OK(session->LocalDeviceManager(&device_mgr));
|
||||||
|
for (auto d : device_mgr->ListDevices()) {
|
||||||
|
device_set.AddDevice(d);
|
||||||
|
}
|
||||||
|
cluster.SetDeviceSet(&device_set);
|
||||||
|
{
|
||||||
|
// engine_info.device is not set, should find and return first gpu id and
|
||||||
|
// corresponding allocator.
|
||||||
|
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||||
|
EXPECT_EQ(0, result.first);
|
||||||
|
EXPECT_NE(nullptr, result.second);
|
||||||
|
EXPECT_EQ("GPU_0_bfc", result.second->Name());
|
||||||
|
}
|
||||||
|
|
||||||
|
engine_info.device = "/GPU:1";
|
||||||
|
{
|
||||||
|
// Set to use second device.
|
||||||
|
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||||
|
EXPECT_EQ(0, result.first);
|
||||||
|
EXPECT_NE(nullptr, result.second);
|
||||||
|
EXPECT_EQ("GPU_1_bfc", result.second->Name());
|
||||||
|
}
|
||||||
|
|
||||||
|
engine_info.device = "/GPU:3";
|
||||||
|
{
|
||||||
|
// Set to use nonexistent device.
|
||||||
|
auto result = GetDeviceAndAllocator(params, engine_info);
|
||||||
|
EXPECT_EQ(-1, result.first);
|
||||||
|
EXPECT_EQ(nullptr, result.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace convert
|
||||||
|
} // namespace tensorrt
|
||||||
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
#endif // GOOGLE_TENSORRT
|
||||||
|
#endif // GOOGLE_CUDA
|
@ -77,6 +77,10 @@ limitations under the License.
|
|||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tensorrt {
|
namespace tensorrt {
|
||||||
|
// TODO(aaroey): put these constants into some class.
|
||||||
|
const char* const kInputPHName = "TensorRTInputPH_";
|
||||||
|
const char* const kOutputPHName = "TensorRTOutputPH_";
|
||||||
|
|
||||||
namespace convert {
|
namespace convert {
|
||||||
using ::tensorflow::str_util::Split;
|
using ::tensorflow::str_util::Split;
|
||||||
using ::tensorflow::strings::StrAppend;
|
using ::tensorflow::strings::StrAppend;
|
||||||
|
@ -36,8 +36,9 @@ limitations under the License.
|
|||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace tensorrt {
|
namespace tensorrt {
|
||||||
static const char* kInputPHName = "TensorRTInputPH_";
|
extern const char* const kInputPHName;
|
||||||
static const char* kOutputPHName = "TensorRTOutputPH_";
|
extern const char* const kOutputPHName;
|
||||||
|
|
||||||
namespace convert {
|
namespace convert {
|
||||||
|
|
||||||
struct EngineConnection {
|
struct EngineConnection {
|
||||||
|
Loading…
Reference in New Issue
Block a user