[TF:TRT] TF-TRT should not split max_workspace_size among TRTEngineOps.

Because TF GPU executes all GPU compute operations on a single stream, we
shouldn't split the max_workspace_size among TRTEngineOps. This CL changes
the bridge to use the converter max_workspace_size for each TRTEngineOp
before we implement a more sophisticate heuristic.

PiperOrigin-RevId: 337245059
Change-Id: Id87b006c631a888f47025d1c01fbabb1e794c44b
This commit is contained in:
Bixia Zheng 2020-10-14 22:51:28 -07:00 committed by TensorFlower Gardener
parent ff2b597e36
commit 5452c25097

View File

@ -755,13 +755,10 @@ Status ConvertAfterShapes(const ConversionParams& params) {
// Get the EngineInfo for each segment.
std::unordered_map<string, Node*> node_map;
TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map));
float total_num_nodes_in_segments = 0.;
std::vector<EngineInfo> engine_segments;
engine_segments.reserve(initial_segments.size());
std::vector<Node*> reverse_topo_order;
GetPostOrder(graph, &reverse_topo_order);
size_t total_engine_bytes_size = 0;
std::vector<size_t> engine_bytes_size;
segment::SegmentNodesVector converted_segments;
converted_segments.reserve(initial_segments.size());
string engine_name_prefix =
@ -793,9 +790,6 @@ Status ConvertAfterShapes(const ConversionParams& params) {
continue;
}
engine_bytes_size.push_back(curr_engine.segment_graph_def.ByteSizeLong());
total_engine_bytes_size += engine_bytes_size.back();
total_num_nodes_in_segments += curr_segment.size();
engine_segments.push_back(std::move(curr_engine));
converted_segments.push_back(std::move(curr_segment));
@ -834,13 +828,9 @@ Status ConvertAfterShapes(const ConversionParams& params) {
engine_nodes.resize(engine_segments.size());
for (int i = 0; i < engine_segments.size(); ++i) {
auto& engine = engine_segments.at(i);
// Partition the workspace size by the average of node ratio and segment
// graphdef size
engine.max_workspace_size_bytes =
params.max_workspace_size_bytes *
(engine_bytes_size.at(i) / total_engine_bytes_size +
converted_segments.at(i).size() / total_num_nodes_in_segments) /
2.0;
// TODO(b/170762693): implement the heuristic to calculate
// max_workspace_size_bytes.
engine.max_workspace_size_bytes = params.max_workspace_size_bytes;
VLOG(1) << "Assigned " << engine.max_workspace_size_bytes << " bytes to "
<< engine.engine_name;
auto status = CreateTRTNode(params, engine_segments, i,