diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index b3c604f5f8c..1e9524c26b3 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -33,7 +33,7 @@ trt_gdef = trt.CreateInferenceGraph( gdef, #original graph_def ["output"], #name of output node(s) max_batch_size, #maximum batch size to run the inference - max_workspace_size) # max memory for TensorRT to use + max_workspace_size_bytes) # max memory for TensorRT to use tf.reset_default_graph() tf.import_graph_def(graph_def=trt_gdef) #...... run inference diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index e0f38c60ee7..81fdf012867 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -123,7 +123,7 @@ tensorflow::Status ConvertSubGraphToTensorRT( const std::set& subgraph_node_ids, size_t max_batch_size, // max batch size that engine will be created for // max amount of memory that engine will be allowed to consume, in bytes - size_t max_workspace_size, + size_t max_workspace_size_bytes, const tensorflow::grappler::GraphProperties& graph_properties, tensorflow::Graph* graph) { tensorflow::EdgeSet subgraph_incoming_edges; @@ -159,7 +159,8 @@ tensorflow::Status ConvertSubGraphToTensorRT( tensorflow::NodeDef trt_node_def; TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( *graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, - max_batch_size, max_workspace_size, graph_properties, &trt_node_def)); + max_batch_size, max_workspace_size_bytes, graph_properties, + &trt_node_def)); tensorflow::Status status; tensorflow::Node* trt_node = graph->AddNode(trt_node_def, &status); @@ -205,7 +206,7 @@ tensorflow::Status BuildNodeMap( tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size, tensorflow::GraphDef* new_graph_def) { + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def) { // optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; @@ -258,7 +259,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); if (segments.size() > 1) { - VLOG(INFO) << "MULTIPLE tensorrt candidate conversion: " << segments.size(); + VLOG(0) << "MULTIPLE tensorrt candidate conversion: " << segments.size(); } std::unordered_map node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); @@ -268,8 +269,8 @@ tensorflow::Status ConvertGraphDefToTensorRT( subgraph_node_ids.insert(node_map.at(node_name)->id()); } TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( - output_names, subgraph_node_ids, max_batch_size, max_workspace_size, - static_graph_properties, &graph)); + output_names, subgraph_node_ids, max_batch_size, + max_workspace_size_bytes, static_graph_properties, &graph)); } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 2653c1c6365..c84684d4858 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -103,9 +103,9 @@ static std::vector> createSamePadding( int left = p / 2; int right = p - left; - VLOG(-1) << "PADDING_" << i << " pre: " << left << ", post: " << right - << "paras: " << inputDims[i] << ", " << stride.d[i] << ", " - << "kernel: " << kernel.d[i]; + VLOG(2) << "PADDING_" << i << " pre: " << left << ", post: " << right + << "paras: " << inputDims[i] << ", " << stride.d[i] << ", " + << "kernel: " << kernel.d[i]; padding[i] = {left, right}; } return padding; @@ -352,7 +352,7 @@ class Converter { tensorflow::NodeDef const& node_def) { std::vector inputs; for (auto const& input_name : node_def.input()) { - VLOG(-1) << "retrieve input: " << input_name; + VLOG(2) << "retrieve input: " << input_name; inputs.push_back(_trt_tensors.at(input_name)); } return inputs; @@ -395,7 +395,7 @@ class Converter { if (output.is_tensor()) { output.tensor()->setName(output_name.c_str()); } - VLOG(-1) << "write out tensor: " << output_name; + VLOG(2) << "write out tensor: " << output_name; if (!_trt_tensors.insert({output_name, output}).second) { return tensorflow::errors::AlreadyExists( "output tensor already exists for op: " + op); @@ -456,13 +456,13 @@ struct LambdaFactory { std::function unary() { switch (op) { case OP_CATEGORY::RSQRT: { - VLOG(-1) << "RSQRT GETS DONE"; + VLOG(2) << "RSQRT GETS DONE"; return [](T t) -> T { return 1.0 / std::sqrt(t); }; } case OP_CATEGORY::NEG: return [](T t) -> T { return -t; }; default: - VLOG(-1) << "not supported op for unary: " << static_cast(op); + VLOG(2) << "not supported op for unary: " << static_cast(op); return nullptr; } } @@ -487,22 +487,22 @@ struct LambdaFactory { template std::function broadcast_r(T val) { - VLOG(-1) << "LAMBDA VAL : " << val; + VLOG(2) << "LAMBDA VAL : " << val; switch (op) { case OP_CATEGORY::ADD: return [val](T l) -> T { - VLOG(-1) << "LAMBDA VAL : " << val; + VLOG(2) << "LAMBDA VAL : " << val; return l + val; }; // return [val](T l)-> T {return l+val;}; case OP_CATEGORY::SUB: return [val](T l) -> T { - VLOG(-1) << "LAMBDA VAL : " << val; + VLOG(2) << "LAMBDA VAL : " << val; return l - val; }; case OP_CATEGORY::MUL: return [val](T l) -> T { - VLOG(-1) << "LAMBDA VAL : " << val; + VLOG(2) << "LAMBDA VAL : " << val; return l * val; }; default: @@ -516,21 +516,21 @@ struct LambdaFactory { template std::function broadcast_l(T val) { - VLOG(-1) << "LAMBDA VAL : " << val; + VLOG(2) << "LAMBDA VAL : " << val; switch (op) { case OP_CATEGORY::ADD: return [val](T l) -> T { - VLOG(-1) << "LAMBDA VAL : " << val; + VLOG(2) << "LAMBDA VAL : " << val; return val + l; }; case OP_CATEGORY::SUB: return [val](T l) -> T { - VLOG(-1) << "LAMBDA VAL : " << val; + VLOG(2) << "LAMBDA VAL : " << val; return val - l; }; case OP_CATEGORY::MUL: return [val](T l) -> T { - VLOG(-1) << "LAMBDA VAL : " << val; + VLOG(2) << "LAMBDA VAL : " << val; return val * l; }; default: @@ -570,7 +570,7 @@ tensorflow::Status BinaryCompute(TRT_ShapedWeights const& iweights_l, // assume iweights_l.type == iweight_r.type CHECK_EQ(iweights_l.type_, oweights->type_); CHECK_EQ(iweights_r.type_, oweights->type_); - VLOG(-1) << "SANITY CHECK!"; + VLOG(2) << "SANITY CHECK!"; switch (iweights_l.type_) { case tensorflow::DataType::DT_FLOAT: { @@ -581,11 +581,11 @@ tensorflow::Status BinaryCompute(TRT_ShapedWeights const& iweights_l, if (iweights_l.count() != iweights_r.count()) { // we only supports broadcast of RankZero if (iweights_l.count() == 1) { - VLOG(-1) << "I bet it is not working!" << (*inp_l); + VLOG(2) << "I bet it is not working!" << (*inp_l); std::transform(inp_r, inp_r + iweights_r.count(), oup, binary_op.broadcast_l(*inp_l)); } else if (iweights_r.count() == 1) { - VLOG(-1) << "I bet it is not working!" << (*inp_r); + VLOG(2) << "I bet it is not working!" << (*inp_r); std::transform(inp_l, inp_l + iweights_l.count(), oup, binary_op.broadcast_r(*inp_r)); } else { @@ -660,8 +660,8 @@ tensorflow::Status ConstantFoldBinary( int nbDims = weights_input_l.shape_.nbDims; nvinfer1::Dims output_shape; output_shape.nbDims = nbDims; - VLOG(-1) << "nbDims: " << nbDims - << "the other: " << weights_input_r.shape_.nbDims; + VLOG(2) << "nbDims: " << nbDims + << "the other: " << weights_input_r.shape_.nbDims; for (int i = 0; i < nbDims; i++) { if (weights_input_l.shape_.d[i] == weights_input_r.shape_.d[i]) { output_shape.d[i] = weights_input_l.shape_.d[i]; @@ -673,9 +673,9 @@ tensorflow::Status ConstantFoldBinary( return tensorflow::errors::Unimplemented( "Binary op with incompatible shape at, " + node_def.op()); } - VLOG(-1) << "left: " << weights_input_l.shape_.d[i] - << "right: " << weights_input_r.shape_.d[i] - << "output: " << output_shape.d[i]; + VLOG(2) << "left: " << weights_input_l.shape_.d[i] + << "right: " << weights_input_r.shape_.d[i] + << "output: " << output_shape.d[i]; } // FIXME assume type matches input weights @@ -735,7 +735,7 @@ tensorflow::Status BinaryTensorOpWeight( auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; if (weights.count() == 1) { - VLOG(-1) << "UNIFORM"; + VLOG(2) << "UNIFORM"; scale_mode = nvinfer1::ScaleMode::kUNIFORM; } else { // no broadcasting on Batch dimension; @@ -838,7 +838,7 @@ tensorflow::Status ConvertPlaceholder( Converter& ctx, tensorflow::NodeDef const& node_def, std::vector const& inputs, std::vector* outputs) { - VLOG(-1) << "Placeholder should have been replace already"; + VLOG(2) << "Placeholder should have been replace already"; return tensorflow::errors::Unimplemented("cannot convert Placeholder op"); // OK this make sense since we are supposed to replace it with input TFAttrs attrs(node_def); @@ -905,12 +905,12 @@ tensorflow::Status ConvertConv2D(Converter& ctx, if (padding[0].first != padding[0].second || padding[1].first != padding[1].second) { // TODO(jie): handle asymmetric padding - VLOG(-1) << "padding!!!: " << padding[0].first << padding[0].second - << padding[1].first << padding[1].second; + VLOG(2) << "padding!!!: " << padding[0].first << padding[0].second + << padding[1].first << padding[1].second; auto dim_before = tensor->getDimensions(); - VLOG(-1) << "TENSOR before: " << dim_before.d[0] << ", " << dim_before.d[1] - << dim_before.d[2] << ", " << dim_before.d[3]; + VLOG(2) << "TENSOR before: " << dim_before.d[0] << ", " << dim_before.d[1] + << dim_before.d[2] << ", " << dim_before.d[3]; auto padLayer = ctx.network()->addPadding( *const_cast(tensor), nvinfer1::DimsHW(padding[0].first, padding[1].first), @@ -918,8 +918,8 @@ tensorflow::Status ConvertConv2D(Converter& ctx, padding = {{0, 0}, {0, 0}}; tensor = padLayer->getOutput(0); auto dim_after = tensor->getDimensions(); - VLOG(-1) << "TENSOR after: " << dim_after.d[0] << ", " << dim_after.d[1] - << dim_after.d[2] << ", " << dim_after.d[3]; + VLOG(2) << "TENSOR after: " << dim_after.d[0] << ", " << dim_after.d[1] + << dim_after.d[2] << ", " << dim_after.d[3]; } nvinfer1::IConvolutionLayer* layer = @@ -932,14 +932,14 @@ tensorflow::Status ConvertConv2D(Converter& ctx, nvinfer1::ITensor* output_tensor = layer->getOutput(0); auto dim_after = output_tensor->getDimensions(); - VLOG(-1) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] - << dim_after.d[2] << ", " << dim_after.d[3]; + VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] + << dim_after.d[2] << ", " << dim_after.d[3]; if (data_format == "NHWC") { // TODO(jie): transpose it back! output_tensor = ctx.transposeTensor(output_tensor, {0, 2, 3, 1}); } else { - VLOG(-1) << "NCHW !!!!"; + VLOG(2) << "NCHW !!!!"; } outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); @@ -961,7 +961,7 @@ tensorflow::Status ConvertPool(Converter& ctx, tensor = ctx.transposeTensor(const_cast(tensor), {0, 3, 1, 2}); } else { - VLOG(-1) << "NCHW !!!!"; + VLOG(2) << "NCHW !!!!"; } nvinfer1::PoolingType type; // TODO(jie): support other pooling type @@ -989,7 +989,7 @@ tensorflow::Status ConvertPool(Converter& ctx, {static_cast(tensor_dim.d[1]), static_cast(tensor_dim.d[2])}); } else if (attrs.get("padding") == "VALID") { // No padding for valid padding here - VLOG(-1) << "no padding added for VALID padding in pool" << node_def.name(); + VLOG(2) << "no padding added for VALID padding in pool" << node_def.name(); padding = {{0, 0}, {0, 0}}; } else { return tensorflow::errors::Unimplemented( @@ -999,8 +999,8 @@ tensorflow::Status ConvertPool(Converter& ctx, if (padding[0].first != padding[0].second || padding[1].first != padding[1].second) { // TODO(jie): handle asymmetric padding - VLOG(-1) << "padding!!!: " << padding[0].first << padding[0].second - << padding[1].first << padding[1].second; + VLOG(2) << "padding!!!: " << padding[0].first << padding[0].second + << padding[1].first << padding[1].second; auto padLayer = ctx.network()->addPadding( *const_cast(tensor), nvinfer1::DimsHW(padding[0].first, padding[1].first), @@ -1021,7 +1021,7 @@ tensorflow::Status ConvertPool(Converter& ctx, // TODO(jie): transpose it back! output_tensor = ctx.transposeTensor(output_tensor, {0, 2, 3, 1}); } else { - VLOG(-1) << "NCHW !!!!"; + VLOG(2) << "NCHW !!!!"; } outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); @@ -1063,7 +1063,7 @@ tensorflow::Status ConvertScale(Converter& ctx, {0, 3, 1, 2}); // TODO(jie): transpose it } else { - VLOG(-1) << "NCHW !!!!"; + VLOG(2) << "NCHW !!!!"; } nvinfer1::IScaleLayer* layer = ctx.network()->addScale( *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, @@ -1074,7 +1074,7 @@ tensorflow::Status ConvertScale(Converter& ctx, // TODO(jie): transpose it back! output_tensor = ctx.transposeTensor(output_tensor, {0, 2, 3, 1}); } else { - VLOG(-1) << "NCHW !!!!"; + VLOG(2) << "NCHW !!!!"; } outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); @@ -1099,14 +1099,14 @@ tensorflow::Status ConvertConst(Converter& ctx, TRT_ShapedWeights weights(dtype); if (!weights_tensor.float_val().empty()) { - VLOG(-1) << "SCALAR!!!" << node_def.name(); + VLOG(2) << "SCALAR!!!" << node_def.name(); nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { - VLOG(-1) << "dimensions: " << tensor.dims(); + VLOG(2) << "dimensions: " << tensor.dims(); weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), get_tensor_shape(tensor)); } else { - VLOG(-1) << "dimensions: " << tensor.dims(); + VLOG(2) << "dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; scalar_shape.d[0] = 1; scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; @@ -1118,7 +1118,7 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape); } } else if (!weights_tensor.tensor_content().empty()) { - VLOG(-1) << "TENSOR!!!" << node_def.name(); + VLOG(2) << "TENSOR!!!" << node_def.name(); weights = TRT_ShapedWeights(dtype, weights_tensor.tensor_content().data(), get_tensor_shape(tensor)); } else { @@ -1403,7 +1403,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( const tensorflow::Graph& graph, const std::set& subgraph_node_ids, const std::vector>& input_inds, const std::vector>& output_inds, size_t max_batch_size, - size_t max_workspace_size, + size_t max_workspace_size_bytes, const tensorflow::grappler::GraphProperties& graph_properties, tensorflow::NodeDef* trt_node) { // Visit nodes in reverse topological order and construct the TRT network. @@ -1466,18 +1466,18 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(convert_dtype(tf_dtype, &dtype)); - VLOG(-1) << "accessing output index of: " << std::to_string(output_idx) - << ", at node: " << node_name - << "with output entry from shape_map: " - << std::to_string(op_info_vec.size()); + VLOG(2) << "accessing output index of: " << std::to_string(output_idx) + << ", at node: " << node_name + << "with output entry from shape_map: " + << std::to_string(op_info_vec.size()); // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; for (int i = 1; i < op_info.shape().dim_size(); i++) { - VLOG(-1) << "dimension: " << i - << " , size: " << op_info.shape().dim(i).size(); + VLOG(2) << "dimension: " << i + << " , size: " << op_info.shape().dim(i).size(); input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); } @@ -1492,23 +1492,22 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( if (!input_tensor) return tensorflow::errors::InvalidArgument( "Failed to create Input layer"); - VLOG(-1) << "input tensor name :" << input_tensor_name; + VLOG(2) << "input tensor name :" << input_tensor_name; if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) return tensorflow::errors::AlreadyExists( "output tensor already exists for op: " + input_tensor_name); } - VLOG(-1) << "finished sorting"; + VLOG(2) << "finished sorting"; for (const tensorflow::Node* node : order) { tensorflow::NodeDef const& node_def = node->def(); - VLOG(-1) << "converting node: " << node_def.name() << " , " - << node_def.op(); + VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op(); TF_RETURN_IF_ERROR(converter.convert_node(node_def)); } - VLOG(-1) << "finished conversion"; + VLOG(2) << "finished conversion"; // Gather output metadata std::vector output_names; @@ -1521,7 +1520,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( std::string tensor_name = op_name; if (output_idx != 0) tensor_name = tensor_name + ":" + std::to_string(output_idx); - VLOG(-1) << "output tensor name: " << tensor_name; + VLOG(2) << "output tensor name: " << tensor_name; output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { @@ -1541,28 +1540,28 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( tensor->setType(trt_dtype); } - VLOG(-1) << "finished output"; + VLOG(2) << "finished output"; static int static_id = 0; // Build the engine trt_builder->setMaxBatchSize(max_batch_size); - trt_builder->setMaxWorkspaceSize(max_workspace_size); - LOG(INFO) << "starting build engine "<setMaxWorkspaceSize(max_workspace_size_bytes); + VLOG(0) << "starting build engine " << static_id; // TODO(ben,jie): half2 and int8 mode support std::string engine_plan_string; { auto trt_engine = infer_object(trt_builder->buildCudaEngine(*converter.network())); - LOG(INFO) << "built network"; + VLOG(0) << "built network"; auto engine_plan = infer_object(trt_engine->serialize()); - VLOG(INFO) << "serialized engine"; + VLOG(0) << "serialized engine"; const char* engine_plan_data = static_cast(engine_plan->data()); engine_plan_string = std::move( std::string(engine_plan_data, engine_plan_data + engine_plan->size())); } - VLOG(INFO) << "finished engine"; + VLOG(0) << "finished engine"; // Build the TRT op // TODO(sami,ben,jie): proper naming! @@ -1581,7 +1580,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( income_edges); op_builder.Input(input_list); - VLOG(INFO) << "finished op preparation"; + VLOG(0) << "finished op preparation"; auto status = op_builder.Attr("serialized_engine", engine_plan_string) .Attr("input_nodes", input_names) @@ -1589,8 +1588,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( .Attr("OutT", output_dtypes) .Finalize(trt_node); - VLOG(INFO) << status.ToString(); - VLOG(INFO) << "finished op building"; + VLOG(0) << status.ToString() << " finished op building"; return tensorflow::Status::OK(); } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 2e7fd19566e..82b12b74ea5 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -38,7 +38,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( input_inds, // {node_id, output_idx} const std::vector>& output_inds, // {node_id, output_idx} - size_t max_batch_size, size_t max_workspace_size_bytes, + size_t max_batch_size, size_t max_workspace_size_bytes_bytes, const tensorflow::grappler::GraphProperties& graph_prop, tensorflow::NodeDef* trt_node); diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.cc b/tensorflow/contrib/tensorrt/log/trt_logger.cc index 5131c80794f..7add8cb8b3d 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.cc +++ b/tensorflow/contrib/tensorrt/log/trt_logger.cc @@ -27,7 +27,7 @@ void Logger::log(Severity severity, const char* msg) { // Suppress info-level messages switch (severity) { case Severity::kINFO: { // Mark TRT info messages as debug! - VLOG(-1) << msg; + VLOG(2) << msg; break; } case Severity::kWARNING: { diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 6bdc20ed04f..b17b07e296a 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -35,7 +35,7 @@ from tensorflow.python.framework import ops def CreateInferenceGraph(input_graph_def, outputs, max_batch_size=1, - max_workspace_size=2 << 20): + max_workspace_size_bytes=2 << 20): """Python wrapper for the TRT transormation. @@ -43,7 +43,7 @@ def CreateInferenceGraph(input_graph_def, input_graph_def: GraphDef object containing a model to be transformed. outputs: List of tensors or node names for the model outputs. max_batch_size: max size for the input batch - max_workspace_size: parameter to control memory allocation (in Bytes) + max_workspace_size_bytes: parameter to control memory allocation (in Bytes) Returns: New GraphDef with TRTEngineOps placed in graph replacing subgraphs. @@ -64,7 +64,7 @@ def CreateInferenceGraph(input_graph_def, # pair or strings where first one is encoded status and the second # one is the transformed graphs protobuf string. out = trt_convert(input_graph_def_str, outputs, max_batch_size, - max_workspace_size) + max_workspace_size_bytes) status = out[0] output_graph_def_string = out[1] del input_graph_def_str #save some memory diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index a7c7e5bc9fa..828b4b35c26 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -40,7 +40,7 @@ std::pair trt_convert( string graph_def_string, // The serialized GraphDef string. std::vector output_names, size_t max_batch_size, - size_t max_workspace_size + size_t max_workspace_size_bytes // Unfortunately we can't use TF_Status here since it // is in c/c_api and brings in a lot of other libraries // which in turn declare ops. These ops are included @@ -68,7 +68,7 @@ std::pair trt_convert( tensorflow::GraphDef outGraph; tensorflow::Status conversion_status = tensorflow::tensorrt::convert::ConvertGraphDefToTensorRT( - graph_def, output_names, max_batch_size, max_workspace_size, + graph_def, output_names, max_batch_size, max_workspace_size_bytes, &outGraph); if (!conversion_status.ok()) { auto retCode = (int)conversion_status.code(); @@ -95,6 +95,6 @@ std::pair trt_convert( std::pair trt_convert(string graph_def_string, std::vector output_names, size_t max_batch_size, - size_t max_workspace_size); + size_t max_workspace_size_bytes); %unignoreall