[TF:TRT] TF-TRT should not split max_workspace_size among TRTEngineOps.

Because TF GPU executes all GPU compute operations on a single stream, we
shouldn't split the max_workspace_size among TRTEngineOps. This CL changes
the bridge to use the converter max_workspace_size for each TRTEngineOp
before we implement a more sophisticate heuristic.

PiperOrigin-RevId: 337245059
Change-Id: Id87b006c631a888f47025d1c01fbabb1e794c44b
This commit is contained in:
Bixia Zheng 2020-10-14 22:51:28 -07:00 committed by TensorFlower Gardener
parent ff2b597e36
commit 5452c25097

View File

@ -755,13 +755,10 @@ Status ConvertAfterShapes(const ConversionParams& params) {
// Get the EngineInfo for each segment. // Get the EngineInfo for each segment.
std::unordered_map<string, Node*> node_map; std::unordered_map<string, Node*> node_map;
TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map));
float total_num_nodes_in_segments = 0.;
std::vector<EngineInfo> engine_segments; std::vector<EngineInfo> engine_segments;
engine_segments.reserve(initial_segments.size()); engine_segments.reserve(initial_segments.size());
std::vector<Node*> reverse_topo_order; std::vector<Node*> reverse_topo_order;
GetPostOrder(graph, &reverse_topo_order); GetPostOrder(graph, &reverse_topo_order);
size_t total_engine_bytes_size = 0;
std::vector<size_t> engine_bytes_size;
segment::SegmentNodesVector converted_segments; segment::SegmentNodesVector converted_segments;
converted_segments.reserve(initial_segments.size()); converted_segments.reserve(initial_segments.size());
string engine_name_prefix = string engine_name_prefix =
@ -793,9 +790,6 @@ Status ConvertAfterShapes(const ConversionParams& params) {
continue; continue;
} }
engine_bytes_size.push_back(curr_engine.segment_graph_def.ByteSizeLong());
total_engine_bytes_size += engine_bytes_size.back();
total_num_nodes_in_segments += curr_segment.size();
engine_segments.push_back(std::move(curr_engine)); engine_segments.push_back(std::move(curr_engine));
converted_segments.push_back(std::move(curr_segment)); converted_segments.push_back(std::move(curr_segment));
@ -834,13 +828,9 @@ Status ConvertAfterShapes(const ConversionParams& params) {
engine_nodes.resize(engine_segments.size()); engine_nodes.resize(engine_segments.size());
for (int i = 0; i < engine_segments.size(); ++i) { for (int i = 0; i < engine_segments.size(); ++i) {
auto& engine = engine_segments.at(i); auto& engine = engine_segments.at(i);
// Partition the workspace size by the average of node ratio and segment // TODO(b/170762693): implement the heuristic to calculate
// graphdef size // max_workspace_size_bytes.
engine.max_workspace_size_bytes = engine.max_workspace_size_bytes = params.max_workspace_size_bytes;
params.max_workspace_size_bytes *
(engine_bytes_size.at(i) / total_engine_bytes_size +
converted_segments.at(i).size() / total_num_nodes_in_segments) /
2.0;
VLOG(1) << "Assigned " << engine.max_workspace_size_bytes << " bytes to " VLOG(1) << "Assigned " << engine.max_workspace_size_bytes << " bytes to "
<< engine.engine_name; << engine.engine_name;
auto status = CreateTRTNode(params, engine_segments, i, auto status = CreateTRTNode(params, engine_segments, i,