Merge pull request #21508 from aaroey:fix_allocator_when_engine_device_not_specified

PiperOrigin-RevId: 209647689
2018-08-21 13:20:25 -07:00 · 2018-08-21 13:20:25 -07:00 · 7cc6abab4e
commit 7cc6abab4e
parent a7e961ac88 4684421d9a
6 changed files with 228 additions and 26 deletions
--- a/tensorflow/contrib/tensorrt/BUILD
+++ b/tensorflow/contrib/tensorrt/BUILD
@ -280,6 +280,7 @@ tf_cuda_library(
        "//tensorflow/core/grappler:grappler_item",
        "//tensorflow/core/grappler:utils",
        "//tensorflow/core:framework_lite",
        "//tensorflow/core:gpu_runtime",
        "//tensorflow/core:graph",
        "//tensorflow/core:lib",
        "//tensorflow/core:lib_internal",
@ -293,6 +294,31 @@ tf_cuda_library(
    ]) + tf_custom_op_library_additional_deps(),
 )
 tf_cuda_cc_test(
    name = "convert_graph_test",
    size = "medium",
    srcs = ["convert/convert_graph_test.cc"],
    tags = [
        "no_cuda_on_cpu_tap",
        "no_windows",
        "nomac",
    ],
    deps = [
        ":trt_conversion",
        "//tensorflow/core/grappler:grappler_item",
        "//tensorflow/core/grappler/clusters:cluster",
        "//tensorflow/core:core_cpu",
        "//tensorflow/core:core_cpu_base",
        "//tensorflow/core:direct_session",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:test",
        "//tensorflow/core:test_main",
    ] + if_tensorrt([
        "@local_config_tensorrt//:nv_infer",
    ]),
 )
 # Library for the segmenting portion of TensorRT operation creation
 cc_library(
    name = "segment",
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@ -31,6 +31,9 @@ limitations under the License.
 #include "tensorflow/contrib/tensorrt/resources/trt_resources.h"
 #include "tensorflow/contrib/tensorrt/segment/segment.h"
 #include "tensorflow/contrib/tensorrt/test/utils.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph_to_functiondef.h"
 #include "tensorflow/core/framework/node_def_builder.h"
@ -772,15 +775,38 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
    const ConversionParams& params, const EngineInfo& engine) {
  int cuda_device_id = -1;
  tensorflow::Allocator* dev_allocator = nullptr;
-  if (params.cluster) {
+  if (params.cluster == nullptr || params.cluster->GetDeviceSet() == nullptr ||
      engine.device.empty()) {
    // If device is not set, use the first found GPU device for the conversion.
    for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) {
      TfGpuId tf_gpu_id(tf_gpu_id_value);
      CudaGpuId cuda_gpu_id;
      Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id);
      if (s.ok()) {
        VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device "
                << cuda_gpu_id.value();
        cuda_device_id = cuda_gpu_id.value();
        GPUOptions gpu_options;
        // If the TF to Cuda gpu id mapping exist, the device and corresponding
        // allocator must have been initialized already, so the
        // GetGPUAllocator() call won't create a new allocator.
        dev_allocator = GPUProcessState::singleton()->GetGPUAllocator(
            gpu_options, tf_gpu_id, 1);
        break;
      }
      LOG(ERROR) << "TF GPU with id " << tf_gpu_id_value << " does not exist "
                 << s;
    }
    return std::make_pair(cuda_device_id, dev_allocator);
  }
  // Use the device requested by the engine.
  auto device_set = params.cluster->GetDeviceSet();
  std::vector<tensorflow::Device*> devices;
    if (!engine.device.empty() && params.cluster->GetDeviceSet()) {
  DeviceNameUtils::ParsedName parsed_name;
  if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) &&
      parsed_name.has_id) {
-        params.cluster->GetDeviceSet()->FindMatchingDevices(parsed_name,
+    device_set->FindMatchingDevices(parsed_name, &devices);
                                                            &devices);
      }
  }
  if (!devices.empty()) {
    if (devices.size() > 1) {
@ -799,7 +825,6 @@ std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
    LOG(WARNING) << "Cluster is set but device '" << engine.device
                 << "' is not found in the cluster";
  }
  }
  return std::make_pair(cuda_device_id, dev_allocator);
 }
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h
@ -17,6 +17,7 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/grappler/clusters/cluster.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
@ -84,6 +85,11 @@ std::vector<int> GetLinkedTensorRTVersion();
 // Return runtime time TensorRT library version information.
 std::vector<int> GetLoadedTensorRTVersion();
 // Helper method for the conversion, expose for testing.
 std::pair<int, tensorflow::Allocator*> GetDeviceAndAllocator(
    const ConversionParams& params, const EngineInfo& engine);
 }  // namespace convert
 }  // namespace tensorrt
 }  // namespace tensorflow
--- a/tensorflow/contrib/tensorrt/convert/convert_graph_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph_test.cc
@ -0,0 +1,140 @@
 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/contrib/tensorrt/convert/convert_graph.h"
 #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/grappler/clusters/cluster.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/protobuf/config.pb.h"  // NOLINT
 #include "tensorflow/core/public/session.h"
 #if GOOGLE_CUDA
 #if GOOGLE_TENSORRT
 namespace tensorflow {
 namespace tensorrt {
 namespace convert {
 class FakeCluster : public grappler::Cluster {
 public:
  FakeCluster() : Cluster(0) {}
  void SetDeviceSet(const DeviceSet* device_set) { device_set_ = device_set; }
  const DeviceSet* GetDeviceSet() const override { return device_set_; }
  string type() const override { return ""; }
  Status Provision() override { return Status::OK(); }
  Status Initialize(const grappler::GrapplerItem& item) override {
    return Status::OK();
  }
  Status Run(const GraphDef& graph_def,
             const std::vector<std::pair<string, Tensor>>& feed,
             const std::vector<string>& fetch,
             RunMetadata* metadata) override {
    return Status::OK();
  }
 private:
  const DeviceSet* device_set_;
 };
 TEST(ConvertGraphTest, GetDeviceAndAllocator) {
  ConversionParams params;
  EngineInfo engine_info;
  {
    // params.cluster is not set, and no gpu device is available.
    auto result = GetDeviceAndAllocator(params, engine_info);
    EXPECT_EQ(-1, result.first);
    EXPECT_EQ(nullptr, result.second);
  }
  // Create a session with two (virtual) gpu device.
  SessionOptions options;
  ConfigProto* config = &options.config;
  GPUOptions* gpu_options = config->mutable_gpu_options();
  auto virtual_devices =
      gpu_options->mutable_experimental()->add_virtual_devices();
  virtual_devices->add_memory_limit_mb(200);
  virtual_devices->add_memory_limit_mb(200);
  std::unique_ptr<Session> session(NewSession(options));
  {
    // params.cluster is not set, should find and return first gpu id and
    // corresponding allocator.
    auto result = GetDeviceAndAllocator(params, engine_info);
    EXPECT_EQ(0, result.first);
    EXPECT_NE(nullptr, result.second);
    EXPECT_EQ("GPU_0_bfc", result.second->Name());
  }
  FakeCluster cluster;
  params.cluster = &cluster;
  {
    // params.cluster->GetDeviceSet() returns null, should find and return first
    // gpu id and corresponding allocator.
    auto result = GetDeviceAndAllocator(params, engine_info);
    EXPECT_EQ(0, result.first);
    EXPECT_NE(nullptr, result.second);
    EXPECT_EQ("GPU_0_bfc", result.second->Name());
  }
  // Build the DeviceSet.
  DeviceSet device_set;
  const DeviceMgr* device_mgr = nullptr;
  TF_ASSERT_OK(session->LocalDeviceManager(&device_mgr));
  for (auto d : device_mgr->ListDevices()) {
    device_set.AddDevice(d);
  }
  cluster.SetDeviceSet(&device_set);
  {
    // engine_info.device is not set, should find and return first gpu id and
    // corresponding allocator.
    auto result = GetDeviceAndAllocator(params, engine_info);
    EXPECT_EQ(0, result.first);
    EXPECT_NE(nullptr, result.second);
    EXPECT_EQ("GPU_0_bfc", result.second->Name());
  }
  engine_info.device = "/GPU:1";
  {
    // Set to use second device.
    auto result = GetDeviceAndAllocator(params, engine_info);
    EXPECT_EQ(0, result.first);
    EXPECT_NE(nullptr, result.second);
    EXPECT_EQ("GPU_1_bfc", result.second->Name());
  }
  engine_info.device = "/GPU:3";
  {
    // Set to use nonexistent device.
    auto result = GetDeviceAndAllocator(params, engine_info);
    EXPECT_EQ(-1, result.first);
    EXPECT_EQ(nullptr, result.second);
  }
 }
 }  // namespace convert
 }  // namespace tensorrt
 }  // namespace tensorflow
 #endif  // GOOGLE_TENSORRT
 #endif  // GOOGLE_CUDA
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@ -77,6 +77,10 @@ limitations under the License.
 namespace tensorflow {
 namespace tensorrt {
 // TODO(aaroey): put these constants into some class.
 const char* const kInputPHName = "TensorRTInputPH_";
 const char* const kOutputPHName = "TensorRTOutputPH_";
 namespace convert {
 using ::tensorflow::str_util::Split;
 using ::tensorflow::strings::StrAppend;
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
@ -36,8 +36,9 @@ limitations under the License.
 namespace tensorflow {
 namespace tensorrt {
-static const char* kInputPHName = "TensorRTInputPH_";
+extern const char* const kInputPHName;
-static const char* kOutputPHName = "TensorRTOutputPH_";
+extern const char* const kOutputPHName;
 namespace convert {
 struct EngineConnection {