[TF:TRT] TF-TRT should not split max_workspace_size among TRTEngineOps.

Because TF GPU executes all GPU compute operations on a single stream, we shouldn't split the max_workspace_size among TRTEngineOps. This CL changes the bridge to use the converter max_workspace_size for each TRTEngineOp before we implement a more sophisticate heuristic. PiperOrigin-RevId: 337245059 Change-Id: Id87b006c631a888f47025d1c01fbabb1e794c44b
2020-10-14 22:51:28 -07:00 · 2020-10-14 22:51:28 -07:00 · 5452c25097
commit 5452c25097
parent ff2b597e36
1 changed files with 3 additions and 13 deletions
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
@ -755,13 +755,10 @@ Status ConvertAfterShapes(const ConversionParams& params) {
  // Get the EngineInfo for each segment.
  std::unordered_map<string, Node*> node_map;
  TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map));
  float total_num_nodes_in_segments = 0.;
  std::vector<EngineInfo> engine_segments;
  engine_segments.reserve(initial_segments.size());
  std::vector<Node*> reverse_topo_order;
  GetPostOrder(graph, &reverse_topo_order);
  size_t total_engine_bytes_size = 0;
  std::vector<size_t> engine_bytes_size;
  segment::SegmentNodesVector converted_segments;
  converted_segments.reserve(initial_segments.size());
  string engine_name_prefix =
@ -793,9 +790,6 @@ Status ConvertAfterShapes(const ConversionParams& params) {
      continue;
    }
    engine_bytes_size.push_back(curr_engine.segment_graph_def.ByteSizeLong());
    total_engine_bytes_size += engine_bytes_size.back();
    total_num_nodes_in_segments += curr_segment.size();
    engine_segments.push_back(std::move(curr_engine));
    converted_segments.push_back(std::move(curr_segment));
@ -834,13 +828,9 @@ Status ConvertAfterShapes(const ConversionParams& params) {
  engine_nodes.resize(engine_segments.size());
  for (int i = 0; i < engine_segments.size(); ++i) {
    auto& engine = engine_segments.at(i);
-    // Partition the workspace size by the average of node ratio and segment
+    // TODO(b/170762693): implement the heuristic to calculate
-    // graphdef size
+    // max_workspace_size_bytes.
-    engine.max_workspace_size_bytes =
+    engine.max_workspace_size_bytes = params.max_workspace_size_bytes;
        params.max_workspace_size_bytes *
        (engine_bytes_size.at(i) / total_engine_bytes_size +
         converted_segments.at(i).size() / total_num_nodes_in_segments) /
        2.0;
    VLOG(1) << "Assigned " << engine.max_workspace_size_bytes << " bytes to "
            << engine.engine_name;
    auto status = CreateTRTNode(params, engine_segments, i,