diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 26236a0435c..a0fc3e43a90 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -280,6 +280,7 @@ tf_cuda_library( "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core:framework_lite", + "//tensorflow/core:gpu_runtime", "//tensorflow/core:graph", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", @@ -293,6 +294,31 @@ tf_cuda_library( ]) + tf_custom_op_library_additional_deps(), ) +tf_cuda_cc_test( + name = "convert_graph_test", + size = "medium", + srcs = ["convert/convert_graph_test.cc"], + tags = [ + "no_cuda_on_cpu_tap", + "no_windows", + "nomac", + ], + deps = [ + ":trt_conversion", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:direct_session", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]), +) + # Library for the segmenting portion of TensorRT operation creation cc_library( name = "segment", diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 21ec8b0b30c..b019c99882b 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -31,6 +31,9 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/resources/trt_resources.h" #include "tensorflow/contrib/tensorrt/segment/segment.h" #include "tensorflow/contrib/tensorrt/test/utils.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph_to_functiondef.h" #include "tensorflow/core/framework/node_def_builder.h" @@ -772,33 +775,55 @@ std::pair GetDeviceAndAllocator( const ConversionParams& params, const EngineInfo& engine) { int cuda_device_id = -1; tensorflow::Allocator* dev_allocator = nullptr; - if (params.cluster) { - std::vector devices; - if (!engine.device.empty() && params.cluster->GetDeviceSet()) { - DeviceNameUtils::ParsedName parsed_name; - if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) && - parsed_name.has_id) { - params.cluster->GetDeviceSet()->FindMatchingDevices(parsed_name, - &devices); + if (params.cluster == nullptr || params.cluster->GetDeviceSet() == nullptr || + engine.device.empty()) { + // If device is not set, use the first found GPU device for the conversion. + for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) { + TfGpuId tf_gpu_id(tf_gpu_id_value); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (s.ok()) { + VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device " + << cuda_gpu_id.value(); + cuda_device_id = cuda_gpu_id.value(); + GPUOptions gpu_options; + // If the TF to Cuda gpu id mapping exist, the device and corresponding + // allocator must have been initialized already, so the + // GetGPUAllocator() call won't create a new allocator. + dev_allocator = GPUProcessState::singleton()->GetGPUAllocator( + gpu_options, tf_gpu_id, 1); + break; } + LOG(ERROR) << "TF GPU with id " << tf_gpu_id_value << " does not exist " + << s; } - if (!devices.empty()) { - if (devices.size() > 1) { - string msg = "Found multiple matching devices using name '"; - StrAppend(&msg, engine.device, "': "); - for (auto d : devices) StrAppend(&msg, d->name(), ", "); - StrAppend(&msg, ". Will get the allocator from first one."); - LOG(WARNING) << msg; - } - tensorflow::AllocatorAttributes alloc_attr; - cuda_device_id = devices[0]->tensorflow_gpu_device_info()->gpu_id; - dev_allocator = devices[0]->GetAllocator(alloc_attr); - VLOG(1) << "Using allocator " << dev_allocator->Name() - << " and cuda_device_id " << cuda_device_id; - } else { - LOG(WARNING) << "Cluster is set but device '" << engine.device - << "' is not found in the cluster"; + return std::make_pair(cuda_device_id, dev_allocator); + } + + // Use the device requested by the engine. + auto device_set = params.cluster->GetDeviceSet(); + std::vector devices; + DeviceNameUtils::ParsedName parsed_name; + if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) && + parsed_name.has_id) { + device_set->FindMatchingDevices(parsed_name, &devices); + } + if (!devices.empty()) { + if (devices.size() > 1) { + string msg = "Found multiple matching devices using name '"; + StrAppend(&msg, engine.device, "': "); + for (auto d : devices) StrAppend(&msg, d->name(), ", "); + StrAppend(&msg, ". Will get the allocator from first one."); + LOG(WARNING) << msg; } + tensorflow::AllocatorAttributes alloc_attr; + cuda_device_id = devices[0]->tensorflow_gpu_device_info()->gpu_id; + dev_allocator = devices[0]->GetAllocator(alloc_attr); + VLOG(1) << "Using allocator " << dev_allocator->Name() + << " and cuda_device_id " << cuda_device_id; + } else { + LOG(WARNING) << "Cluster is set but device '" << engine.device + << "' is not found in the cluster"; } return std::make_pair(cuda_device_id, dev_allocator); } diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index 9d986e48904..35252023698 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -17,6 +17,7 @@ limitations under the License. #include +#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/grappler/clusters/cluster.h" #include "tensorflow/core/grappler/costs/graph_properties.h" @@ -84,6 +85,11 @@ std::vector GetLinkedTensorRTVersion(); // Return runtime time TensorRT library version information. std::vector GetLoadedTensorRTVersion(); + +// Helper method for the conversion, expose for testing. +std::pair GetDeviceAndAllocator( + const ConversionParams& params, const EngineInfo& engine); + } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph_test.cc b/tensorflow/contrib/tensorrt/convert/convert_graph_test.cc new file mode 100644 index 00000000000..8146bed4b05 --- /dev/null +++ b/tensorflow/contrib/tensorrt/convert/convert_graph_test.cc @@ -0,0 +1,140 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/convert/convert_graph.h" + +#include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/device_set.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/config.pb.h" // NOLINT +#include "tensorflow/core/public/session.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT + +namespace tensorflow { +namespace tensorrt { +namespace convert { + +class FakeCluster : public grappler::Cluster { + public: + FakeCluster() : Cluster(0) {} + + void SetDeviceSet(const DeviceSet* device_set) { device_set_ = device_set; } + + const DeviceSet* GetDeviceSet() const override { return device_set_; } + + string type() const override { return ""; } + Status Provision() override { return Status::OK(); } + Status Initialize(const grappler::GrapplerItem& item) override { + return Status::OK(); + } + Status Run(const GraphDef& graph_def, + const std::vector>& feed, + const std::vector& fetch, + RunMetadata* metadata) override { + return Status::OK(); + } + + private: + const DeviceSet* device_set_; +}; + +TEST(ConvertGraphTest, GetDeviceAndAllocator) { + ConversionParams params; + EngineInfo engine_info; + { + // params.cluster is not set, and no gpu device is available. + auto result = GetDeviceAndAllocator(params, engine_info); + EXPECT_EQ(-1, result.first); + EXPECT_EQ(nullptr, result.second); + } + + // Create a session with two (virtual) gpu device. + SessionOptions options; + ConfigProto* config = &options.config; + GPUOptions* gpu_options = config->mutable_gpu_options(); + auto virtual_devices = + gpu_options->mutable_experimental()->add_virtual_devices(); + virtual_devices->add_memory_limit_mb(200); + virtual_devices->add_memory_limit_mb(200); + std::unique_ptr session(NewSession(options)); + + { + // params.cluster is not set, should find and return first gpu id and + // corresponding allocator. + auto result = GetDeviceAndAllocator(params, engine_info); + EXPECT_EQ(0, result.first); + EXPECT_NE(nullptr, result.second); + EXPECT_EQ("GPU_0_bfc", result.second->Name()); + } + + FakeCluster cluster; + params.cluster = &cluster; + { + // params.cluster->GetDeviceSet() returns null, should find and return first + // gpu id and corresponding allocator. + auto result = GetDeviceAndAllocator(params, engine_info); + EXPECT_EQ(0, result.first); + EXPECT_NE(nullptr, result.second); + EXPECT_EQ("GPU_0_bfc", result.second->Name()); + } + + // Build the DeviceSet. + DeviceSet device_set; + const DeviceMgr* device_mgr = nullptr; + TF_ASSERT_OK(session->LocalDeviceManager(&device_mgr)); + for (auto d : device_mgr->ListDevices()) { + device_set.AddDevice(d); + } + cluster.SetDeviceSet(&device_set); + { + // engine_info.device is not set, should find and return first gpu id and + // corresponding allocator. + auto result = GetDeviceAndAllocator(params, engine_info); + EXPECT_EQ(0, result.first); + EXPECT_NE(nullptr, result.second); + EXPECT_EQ("GPU_0_bfc", result.second->Name()); + } + + engine_info.device = "/GPU:1"; + { + // Set to use second device. + auto result = GetDeviceAndAllocator(params, engine_info); + EXPECT_EQ(0, result.first); + EXPECT_NE(nullptr, result.second); + EXPECT_EQ("GPU_1_bfc", result.second->Name()); + } + + engine_info.device = "/GPU:3"; + { + // Set to use nonexistent device. + auto result = GetDeviceAndAllocator(params, engine_info); + EXPECT_EQ(-1, result.first); + EXPECT_EQ(nullptr, result.second); + } +} + +} // namespace convert +} // namespace tensorrt +} // namespace tensorflow + +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 863074e773c..0f5abe68986 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -77,6 +77,10 @@ limitations under the License. namespace tensorflow { namespace tensorrt { +// TODO(aaroey): put these constants into some class. +const char* const kInputPHName = "TensorRTInputPH_"; +const char* const kOutputPHName = "TensorRTOutputPH_"; + namespace convert { using ::tensorflow::str_util::Split; using ::tensorflow::strings::StrAppend; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index a60253740fe..9274027e632 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -36,8 +36,9 @@ limitations under the License. namespace tensorflow { namespace tensorrt { -static const char* kInputPHName = "TensorRTInputPH_"; -static const char* kOutputPHName = "TensorRTOutputPH_"; +extern const char* const kInputPHName; +extern const char* const kOutputPHName; + namespace convert { struct EngineConnection {