[TF:TRT] Ensure that each TensorRT layer has a non-empty name to support xprof.
Extend the layer name format from [main_op_name]_[sub_op_name] to [main_op_name]_[sub_op_name]_[sub_op_instance]. This is to handle the case where we need to generate a few layers with the same operation type to convert a TF operation to TensorRT, and the number of such layers is only known at conversion time. An example of this case is a TF node may be transposed in a few ways to produce a few inputs of a TRTEngineOp. During conversion, we will generate a few shuffle layers for the TF node and the name of those shuffle layers only differ in their [sub_op_instance]. This avoids having different layers to share the same name, which causes TensorRT engine creation failure. Extend a few utility routines that may be called a few times to create TensorRT layers when converting the same TF node with a parameter to represent the call instance. This allows us to use the instance index as part of the TensorRT layer names to avoid name collision. Fix the operation converters to set up TensorRT layer names. Modify convert_nodes_test to check layer names are not empty. PiperOrigin-RevId: 336697580 Change-Id: Ie1f1c80fd9f8da9532e722d84f1bbbeda210ec73
This commit is contained in:
parent
ad144d9bd2
commit
ed5360e8f6
@ -26,6 +26,7 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "absl/algorithm/container.h"
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "absl/memory/memory.h"
|
||||
#include "absl/strings/match.h"
|
||||
#include "absl/strings/str_cat.h"
|
||||
@ -429,24 +430,52 @@ Status GetTrtBroadcastShape(const TRT_TensorOrWeights& operand_l,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::string GetLayerNameSuffix(absl::string_view sub_op_name,
|
||||
absl::optional<int> sub_op_instance) {
|
||||
std::string op_suffix(sub_op_name);
|
||||
if (sub_op_instance.has_value()) {
|
||||
op_suffix =
|
||||
absl::StrCat(op_suffix, "_", std::to_string(sub_op_instance.value()));
|
||||
}
|
||||
return op_suffix;
|
||||
}
|
||||
|
||||
// Sets the name of an ILayer using the name of the node_def. If the operation
|
||||
// represented by the ILayer is generated by the converter to support the
|
||||
// conversion of node_def, callers need to specify a non-empty sub_op_name
|
||||
// appended to the name of the node_def to avoid layer name conflicts.
|
||||
// to be appended to the name of node_def to avoid layer name conflicts. If the
|
||||
// operation is generated multiple times, callers also need to specify
|
||||
// sub_op_instance to be appended to the name of the layers to avoid layer name
|
||||
// conflicts.
|
||||
void SetLayerName(nvinfer1::ILayer* layer, const NodeDef& node_def,
|
||||
absl::string_view sub_op_name = "") {
|
||||
if (sub_op_name.empty()) {
|
||||
absl::string_view sub_op_name = "",
|
||||
absl::optional<int> sub_op_instance = absl::nullopt) {
|
||||
std::string sub_op_suffix = GetLayerNameSuffix(sub_op_name, sub_op_instance);
|
||||
if (sub_op_suffix.empty()) {
|
||||
layer->setName(node_def.name().c_str());
|
||||
} else {
|
||||
layer->setName(absl::StrCat(node_def.name(), "-", sub_op_name).c_str());
|
||||
layer->setName(absl::StrCat(node_def.name(), "-", sub_op_suffix).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Sets the name of an ILayer using the format of
|
||||
// "main_op_name"_"sub_op_name"_"sub_op_instance".
|
||||
void SetLayerName(nvinfer1::ILayer* layer, absl::string_view main_op_name,
|
||||
absl::string_view sub_op_name,
|
||||
absl::optional<int> sub_op_instance = absl::nullopt) {
|
||||
std::string layer_name_suffix =
|
||||
GetLayerNameSuffix(sub_op_name, sub_op_instance);
|
||||
layer->setName(absl::StrCat(main_op_name, "-", layer_name_suffix).c_str());
|
||||
}
|
||||
|
||||
nvinfer1::ITensor* Converter::CreateConstantLayer(
|
||||
const TRT_ShapedWeights& weights, const nvinfer1::Dims& dims) {
|
||||
nvinfer1::Weights trt_weights = weights.GetTrtWeights();
|
||||
nvinfer1::IConstantLayer* layer = network()->addConstant(dims, trt_weights);
|
||||
if (!layer) return nullptr;
|
||||
SetLayerName(layer, "_tftrt_constant_",
|
||||
std::to_string(next_constant_layer_id_));
|
||||
next_constant_layer_id_++;
|
||||
nvinfer1::ITensor* trt_tensor = layer->getOutput(0);
|
||||
#if !IS_TRT_VERSION_GE(5, 1, 3, 0)
|
||||
// TODO(laigd): there is a bug in TensorRT 5.0 library that, if we don't set
|
||||
@ -1326,6 +1355,7 @@ Status Converter::AddInputTensor(const string& name, nvinfer1::DataType dtype,
|
||||
|
||||
Status Converter::RenameAndMarkOutputTensors(
|
||||
const std::vector<Converter::EngineOutputInfo>& output_tensors) {
|
||||
int output_index = 0;
|
||||
for (const auto& output : output_tensors) {
|
||||
TRT_TensorOrWeights tensor_or_weights;
|
||||
TF_RETURN_IF_ERROR(
|
||||
@ -1354,6 +1384,7 @@ Status Converter::RenameAndMarkOutputTensors(
|
||||
nvinfer1::IShuffleLayer* layer = network()->addShuffle(*tensor);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(
|
||||
layer, StrCat("Output Copy for ", tensor->getName()));
|
||||
SetLayerName(layer, tensor->getName(), "shuffle", output_index);
|
||||
MarkQuantizationRangesAsInferrable(tensor, layer->getOutput(0));
|
||||
tensor = layer->getOutput(0);
|
||||
}
|
||||
@ -1362,6 +1393,7 @@ Status Converter::RenameAndMarkOutputTensors(
|
||||
// Set type after marking as output. TRT only supports setType for engine
|
||||
// outputs and inputs (type is inferred otherwise).
|
||||
tensor->setType(output.trt_dtype);
|
||||
output_index++;
|
||||
VLOG(1) << "Marking output TRT tensor " << output.source_tensor_name
|
||||
<< " with data type " << DebugString(output.trt_dtype)
|
||||
<< ", which feeds TF node " << output.dest_node_name;
|
||||
@ -1570,7 +1602,9 @@ Status Converter::GetWeightRange(const TRT_ShapedWeights& weights,
|
||||
Status Converter::PrepareTensorForShape(const TRT_TensorOrWeights& input,
|
||||
const nvinfer1::Dims& dims,
|
||||
const bool validation_only,
|
||||
nvinfer1::ITensor** tensor) {
|
||||
nvinfer1::ITensor** tensor,
|
||||
const NodeDef& node_def,
|
||||
absl::optional<int> op_instance) {
|
||||
const nvinfer1::Dims input_dims = input.GetTrtDims();
|
||||
// If one of input_dims and dims doesn't have static shape, it means some of
|
||||
// the dims are unknown or need to be inferred. And we don't do further checks
|
||||
@ -1601,6 +1635,7 @@ Status Converter::PrepareTensorForShape(const TRT_TensorOrWeights& input,
|
||||
nvinfer1::IShuffleLayer* layer =
|
||||
this->network()->addShuffle(*input.tensor());
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, "TF-TRT Internal Reshape");
|
||||
SetLayerName(layer, node_def, "shuffle", op_instance);
|
||||
layer->setReshapeDimensions(dims);
|
||||
MarkQuantizationRangesAsInferrable(input.tensor(), layer->getOutput(0));
|
||||
*tensor = layer->getOutput(0);
|
||||
@ -2101,6 +2136,7 @@ Status Conv2DPaddingHelper(OpConverterParams* params, const TFAttrs& attrs,
|
||||
*tensor, nvinfer1::DimsHW((*padding)[0].first, (*padding)[1].first),
|
||||
nvinfer1::DimsHW((*padding)[0].second, (*padding)[1].second));
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(pad_layer, params->node_def.name());
|
||||
SetLayerName(pad_layer, params->node_def, "pad");
|
||||
params->converter->MarkQuantizationRangesAsInferrable(
|
||||
tensor, pad_layer->getOutput(0));
|
||||
*padding = {{0, 0}, {0, 0}};
|
||||
@ -2267,7 +2303,6 @@ Status ConvertConv2DHelper(OpConverterParams* params, int group,
|
||||
#else
|
||||
layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
|
||||
#endif
|
||||
SetLayerName(layer, node_def);
|
||||
layer->setNbGroups(num_groups);
|
||||
conv_layer = layer;
|
||||
} else {
|
||||
@ -2284,11 +2319,11 @@ Status ConvertConv2DHelper(OpConverterParams* params, int group,
|
||||
#else
|
||||
layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
|
||||
#endif
|
||||
SetLayerName(layer, node_def);
|
||||
layer->setNbGroups(num_groups);
|
||||
layer->setDilation(dilation);
|
||||
conv_layer = layer;
|
||||
}
|
||||
SetLayerName(conv_layer, node_def, "conv");
|
||||
nvinfer1::ITensor* output_tensor = conv_layer->getOutput(0);
|
||||
// Add an extra padding for Deconv because TRT doesn't accept the
|
||||
// argument output_shape and thus the TRT output shape could be wrong
|
||||
@ -2321,6 +2356,7 @@ Status ConvertConv2DHelper(OpConverterParams* params, int group,
|
||||
params->converter->network()->addPadding(*output_tensor, pre_padding,
|
||||
post_padding);
|
||||
output_tensor = padding_layer->getOutput(0);
|
||||
SetLayerName(padding_layer, node_def, "pad");
|
||||
}
|
||||
}
|
||||
// Restore transpose.
|
||||
@ -2415,6 +2451,7 @@ Status ConvertShape(OpConverterParams* params) {
|
||||
nvinfer1::IShapeLayer* shape_layer =
|
||||
params->converter->network()->addShape(*inputs.at(0).tensor());
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(shape_layer, params->node_def.name());
|
||||
SetLayerName(shape_layer, params->node_def, "shape");
|
||||
params->outputs->push_back(TRT_TensorOrWeights(shape_layer->getOutput(0)));
|
||||
return Status::OK();
|
||||
#else
|
||||
@ -2485,7 +2522,7 @@ Status ConvertReshape(OpConverterParams* params) {
|
||||
nvinfer1::ITensor* output_tensor = nullptr;
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
input_tensor, output_nonbatch_dims, params->validation_only,
|
||||
&output_tensor));
|
||||
&output_tensor, params->node_def));
|
||||
if (params->validation_only) return Status::OK();
|
||||
|
||||
// Record the conversion result.
|
||||
@ -2528,7 +2565,8 @@ Status ConvertExpandDims(OpConverterParams* params) {
|
||||
nvinfer1::Dims new_dims;
|
||||
TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims));
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
input_tensor, new_dims, /*validation_only=*/false, &output_tensor));
|
||||
input_tensor, new_dims, /*validation_only=*/false, &output_tensor,
|
||||
params->node_def));
|
||||
}
|
||||
params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
|
||||
return Status::OK();
|
||||
@ -2538,7 +2576,8 @@ Status Converter::DynamicReshape(nvinfer1::ITensor* input,
|
||||
std::vector<std::pair<int, int>> slices,
|
||||
OpConverterParams* params,
|
||||
nvinfer1::ITensor** output,
|
||||
std::vector<int> size_for_added_dims) {
|
||||
std::vector<int> size_for_added_dims,
|
||||
absl::optional<int> op_instance) {
|
||||
*output = nullptr;
|
||||
// DynamicReshape relies on INetworkDefinition::addShape that was introduced
|
||||
// in TensorRT 6.
|
||||
@ -2550,9 +2589,11 @@ Status Converter::DynamicReshape(nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* shape = network()->addShape(*input)->getOutput(0);
|
||||
// Build new shape = shape[:trt_axis] + [1] + shape[trt_axis:]
|
||||
std::vector<nvinfer1::ITensor const*> concat_inputs;
|
||||
for (int i = 0; i < std::max(slices.size(), size_for_added_dims.size());
|
||||
i++) {
|
||||
int max_num_slices = std::max(slices.size(), size_for_added_dims.size());
|
||||
int op_instance_value = op_instance.has_value() ? op_instance.value() : 0;
|
||||
for (int i = 0; i < max_num_slices; i++) {
|
||||
nvinfer1::ITensor* tensor;
|
||||
int slice_instance = i * max_num_slices + op_instance_value;
|
||||
// maybe_add_a_dimension(i);
|
||||
if (i < size_for_added_dims.size() && size_for_added_dims[i] >= 0) {
|
||||
TF_RETURN_IF_ERROR(
|
||||
@ -2560,11 +2601,11 @@ Status Converter::DynamicReshape(nvinfer1::ITensor* input,
|
||||
concat_inputs.push_back(tensor);
|
||||
}
|
||||
if (i < slices.size()) {
|
||||
concat_inputs.push_back(
|
||||
network()
|
||||
->addSlice(*shape, {1, {slices[i].first}},
|
||||
{1, {slices[i].second - slices[i].first}}, {1, {1}})
|
||||
->getOutput(0));
|
||||
nvinfer1::ISliceLayer* slice_layer = network()->addSlice(
|
||||
*shape, {1, {slices[i].first}},
|
||||
{1, {slices[i].second - slices[i].first}}, {1, {1}});
|
||||
concat_inputs.push_back(slice_layer->getOutput(0));
|
||||
SetLayerName(slice_layer, params->node_def, "slice", slice_instance);
|
||||
}
|
||||
}
|
||||
nvinfer1::IConcatenationLayer* concat_layer = network()->addConcatenation(
|
||||
@ -2574,6 +2615,7 @@ Status Converter::DynamicReshape(nvinfer1::ITensor* input,
|
||||
nvinfer1::ITensor* new_shape = concat_layer->getOutput(0);
|
||||
// Reshape input using new shape
|
||||
nvinfer1::IShuffleLayer* shuffle = network()->addShuffle(*input);
|
||||
SetLayerName(shuffle, params->node_def, "shuffle", op_instance);
|
||||
shuffle->setInput(1, *new_shape);
|
||||
*output = shuffle->getOutput(0);
|
||||
return Status::OK();
|
||||
@ -2586,7 +2628,8 @@ Status Converter::DynamicReshape(nvinfer1::ITensor* input,
|
||||
Status Converter::DynamicExpandDims(nvinfer1::ITensor* input,
|
||||
const nvinfer1::Dims& dims, int axis,
|
||||
OpConverterParams* params,
|
||||
nvinfer1::ITensor** output) {
|
||||
nvinfer1::ITensor** output,
|
||||
absl::optional<int> op_instance) {
|
||||
if (params->validation_only) {
|
||||
*output = nullptr;
|
||||
return errors::Internal(
|
||||
@ -2602,7 +2645,7 @@ Status Converter::DynamicExpandDims(nvinfer1::ITensor* input,
|
||||
if (axis != dims.nbDims) {
|
||||
slices.push_back(std::pair<int, int>{axis, dims.nbDims});
|
||||
}
|
||||
return DynamicReshape(input, slices, params, output, extra_dims);
|
||||
return DynamicReshape(input, slices, params, output, extra_dims, op_instance);
|
||||
}
|
||||
|
||||
Status Converter::SqueezeTensor(nvinfer1::ITensor* input,
|
||||
@ -2630,7 +2673,8 @@ Status Converter::SqueezeTensor(nvinfer1::ITensor* input,
|
||||
VLOG(2) << "input_dims" << input_dims;
|
||||
TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(*input_dims, &new_dims));
|
||||
TF_RETURN_IF_ERROR(PrepareTensorForShape(TRT_TensorOrWeights(input), new_dims,
|
||||
/*validation_only=*/false, output));
|
||||
/*validation_only=*/false, output,
|
||||
params->node_def));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -2694,11 +2738,11 @@ Status ConvertSqueeze(OpConverterParams* params) {
|
||||
}
|
||||
|
||||
template <typename Container>
|
||||
Status ConvertStridedSliceHelper(OpConverterParams* params,
|
||||
const TRT_TensorOrWeights& input,
|
||||
Container begin, Container size,
|
||||
const Container& stride,
|
||||
const nvinfer1::Dims* final_shape = nullptr) {
|
||||
Status ConvertStridedSliceHelper(
|
||||
OpConverterParams* params, const TRT_TensorOrWeights& input,
|
||||
Container begin, Container size, const Container& stride,
|
||||
const nvinfer1::Dims* final_shape = nullptr,
|
||||
absl::optional<int> op_instance = absl::nullopt) {
|
||||
const auto& node_def = params->node_def;
|
||||
// Get input dims.
|
||||
nvinfer1::Dims dims = input.GetTrtDims();
|
||||
@ -2723,6 +2767,7 @@ Status ConvertStridedSliceHelper(OpConverterParams* params,
|
||||
node_def.op(), ", at ", node_def.name());
|
||||
}
|
||||
}
|
||||
|
||||
// TRT 5.1 adds ISliceLayer. For older versions, we attempt to use the
|
||||
// padding layer with negative padding.
|
||||
#if IS_TRT_VERSION_GE(5, 1, 3, 1)
|
||||
@ -2737,12 +2782,13 @@ Status ConvertStridedSliceHelper(OpConverterParams* params,
|
||||
|
||||
nvinfer1::ISliceLayer* layer = params->converter->network()->addSlice(
|
||||
*input.tensor(), begin_dims, size_dims, stride_dims);
|
||||
SetLayerName(layer, params->node_def, "slice", op_instance);
|
||||
nvinfer1::ITensor* tensor = layer->getOutput(0);
|
||||
// Reshape for shrink_axis.
|
||||
if (final_shape) {
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
TRT_TensorOrWeights(tensor), *final_shape, /*validation_only=*/false,
|
||||
&tensor));
|
||||
&tensor, node_def, op_instance));
|
||||
}
|
||||
params->outputs->push_back(TRT_TensorOrWeights(tensor));
|
||||
return Status::OK();
|
||||
@ -2796,6 +2842,7 @@ Status ConvertStridedSliceHelper(OpConverterParams* params,
|
||||
if (params->validation_only) return Status::OK();
|
||||
nvinfer1::IShuffleLayer* layer =
|
||||
params->converter->network()->addShuffle(*input.tensor());
|
||||
SetLayerName(layer, params->node_def, "shuffle", op_instance);
|
||||
params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
|
||||
return Status::OK();
|
||||
} else if (pad_dims.size() == 1) {
|
||||
@ -2844,29 +2891,32 @@ Status ConvertStridedSliceHelper(OpConverterParams* params,
|
||||
nvinfer1::ITensor* tensor = input.tensor();
|
||||
if (need_reshape) {
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
input, reshape_dims, /*validation_only=*/false, &tensor));
|
||||
input, reshape_dims, /*validation_only=*/false, &tensor, node_def,
|
||||
op_instance));
|
||||
}
|
||||
if (need_transpose) {
|
||||
TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
|
||||
tensor, transpose_order, &tensor, node_def, "for_pad"));
|
||||
tensor, transpose_order, &tensor, node_def, "for_pad", op_instance));
|
||||
}
|
||||
// Add padding layer
|
||||
nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding(
|
||||
*tensor, pre_padding, post_padding);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, params->node_def, "pad");
|
||||
params->converter->MarkQuantizationRangesAsInferrable(tensor,
|
||||
layer->getOutput(0));
|
||||
tensor = layer->getOutput(0);
|
||||
// Restore transpose
|
||||
if (need_transpose) {
|
||||
TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
|
||||
tensor, inv_transpose_order, &tensor, node_def, "after_pad"));
|
||||
TF_RETURN_IF_ERROR(
|
||||
params->converter->TransposeTensor(tensor, inv_transpose_order, &tensor,
|
||||
node_def, "after_pad", op_instance));
|
||||
}
|
||||
// Reshape for shrink_axis.
|
||||
if (final_shape) {
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
TRT_TensorOrWeights(tensor), *final_shape, /*validation_only=*/false,
|
||||
&tensor));
|
||||
&tensor, node_def, op_instance));
|
||||
} else if (need_reshape) {
|
||||
// Restore reshape.
|
||||
// Calculate output dimensions
|
||||
@ -2889,7 +2939,7 @@ Status ConvertStridedSliceHelper(OpConverterParams* params,
|
||||
/*ignore_first_dim=*/true));
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
TRT_TensorOrWeights(tensor), new_dims, /*validation_only=*/false,
|
||||
&tensor));
|
||||
&tensor, node_def, op_instance));
|
||||
}
|
||||
|
||||
params->outputs->push_back(TRT_TensorOrWeights(tensor));
|
||||
@ -3218,7 +3268,6 @@ Status ConvertConv3DHelper(OpConverterParams* params, int group,
|
||||
layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
|
||||
}
|
||||
|
||||
SetLayerName(layer, node_def);
|
||||
layer->setNbGroups(num_groups);
|
||||
conv_layer = layer;
|
||||
} else {
|
||||
@ -3234,11 +3283,11 @@ Status ConvertConv3DHelper(OpConverterParams* params, int group,
|
||||
layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
|
||||
}
|
||||
|
||||
SetLayerName(layer, node_def);
|
||||
layer->setNbGroups(num_groups);
|
||||
layer->setDilationNd(dilation_dhw);
|
||||
conv_layer = layer;
|
||||
}
|
||||
SetLayerName(conv_layer, node_def, "conv");
|
||||
nvinfer1::ITensor* output_tensor = conv_layer->getOutput(0);
|
||||
|
||||
// Restore transpose.
|
||||
@ -3334,7 +3383,7 @@ Status ConvertPool3D(OpConverterParams* params) {
|
||||
// SAME_UPPER means that post padding is preferred.
|
||||
layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
|
||||
}
|
||||
SetLayerName(layer, node_def);
|
||||
SetLayerName(layer, node_def, "pooling");
|
||||
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
|
||||
|
||||
if (data_format == "NDHWC") {
|
||||
@ -3491,7 +3540,7 @@ Status ConvertFusedConv2DBiasActivation(OpConverterParams* params) {
|
||||
#else
|
||||
conv_layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
|
||||
#endif
|
||||
SetLayerName(conv_layer, node_def);
|
||||
SetLayerName(conv_layer, node_def, "conv");
|
||||
conv_layer->setNbGroups(1);
|
||||
conv_layer->setDilation(dilation);
|
||||
nvinfer1::ITensor* output_tensor = conv_layer->getOutput(0);
|
||||
@ -3502,6 +3551,7 @@ Status ConvertFusedConv2DBiasActivation(OpConverterParams* params) {
|
||||
params->converter->network()->addActivation(*output_tensor,
|
||||
op_pair->second);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(activation_layer, node_def.name());
|
||||
SetLayerName(activation_layer, node_def, "activation");
|
||||
output_tensor = activation_layer->getOutput(0);
|
||||
}
|
||||
// Restore transpose.
|
||||
@ -3583,6 +3633,7 @@ Status ConvertPool(OpConverterParams* params) {
|
||||
*tensor, nvinfer1::DimsHW(padding[0].first, padding[1].first),
|
||||
nvinfer1::DimsHW(padding[0].second, padding[1].second));
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(pad_layer, node_def.name());
|
||||
SetLayerName(pad_layer, node_def, "pad");
|
||||
params->converter->MarkQuantizationRangesAsInferrable(
|
||||
tensor, pad_layer->getOutput(0));
|
||||
padding = {{0, 0}, {0, 0}};
|
||||
@ -3612,7 +3663,7 @@ Status ConvertPool(OpConverterParams* params) {
|
||||
#else
|
||||
layer->setPadding(nvinfer1::DimsHW{padding[0].first, padding[1].first});
|
||||
#endif
|
||||
SetLayerName(layer, node_def);
|
||||
SetLayerName(layer, node_def, "pooling");
|
||||
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
|
||||
|
||||
if (data_format == "NHWC") {
|
||||
@ -3640,6 +3691,7 @@ Status ConvertLeakyRelu(OpConverterParams* params) {
|
||||
params->converter->network()->addActivation(
|
||||
*inputs.at(0).tensor(), nvinfer1::ActivationType::kLEAKY_RELU);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def, "activation");
|
||||
layer->setAlpha(alpha);
|
||||
params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
|
||||
return Status::OK();
|
||||
@ -3662,12 +3714,14 @@ Status ConvertLeakyRelu(OpConverterParams* params) {
|
||||
params->converter->network()->addElementWise(
|
||||
*tensor, *const_alpha_tensor, nvinfer1::ElementWiseOperation::kPROD);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(mul_layer, node_def.name());
|
||||
SetLayerName(mul_layer, node_def, "mul");
|
||||
// max(x, alpha * x)
|
||||
nvinfer1::IElementWiseLayer* max_layer =
|
||||
params->converter->network()->addElementWise(
|
||||
*tensor, *mul_layer->getOutput(0),
|
||||
nvinfer1::ElementWiseOperation::kMAX);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(max_layer, node_def.name());
|
||||
SetLayerName(mul_layer, node_def, "max");
|
||||
nvinfer1::ITensor* output_tensor = max_layer->getOutput(0);
|
||||
params->converter->MarkQuantizationRangesAsInferrable(
|
||||
output_tensor, mul_layer->getOutput(0));
|
||||
@ -3712,6 +3766,7 @@ Status ConvertClipByValue(OpConverterParams* params) {
|
||||
layer->setAlpha(clip_value_min);
|
||||
layer->setBeta(clip_value_max);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def, "activation");
|
||||
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
|
||||
params->converter->ProvideQuantizationRange(output_tensor, clip_value_min,
|
||||
clip_value_max);
|
||||
@ -3755,7 +3810,7 @@ Status ConvertActivation(OpConverterParams* params) {
|
||||
params->converter->network()->addActivation(*inputs.at(0).tensor(),
|
||||
op_pair->second);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def);
|
||||
SetLayerName(layer, node_def, "activation");
|
||||
// Set parameters.
|
||||
#if IS_TRT_VERSION_GE(5, 1, 2, 0)
|
||||
if (node_def.op() == "Elu") {
|
||||
@ -3859,7 +3914,7 @@ Status ConvertRelu6(OpConverterParams* params) {
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
layer->setAlpha(0.0f);
|
||||
layer->setBeta(6.0f);
|
||||
SetLayerName(layer, node_def);
|
||||
SetLayerName(layer, node_def, "activation");
|
||||
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
|
||||
params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 6.0f);
|
||||
params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
|
||||
@ -3874,6 +3929,7 @@ Status ConvertRelu6(OpConverterParams* params) {
|
||||
params->converter->network()->addActivation(
|
||||
*tensor, nvinfer1::ActivationType::kRELU);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(relu_layer, node_def.name());
|
||||
SetLayerName(relu_layer, node_def, "activation");
|
||||
|
||||
// Large range of relu is problematic during quantization in INT8 precision
|
||||
// mode. Setting dynamic range of relu = [0.f, 6.0f] helps with quantization.
|
||||
@ -3895,6 +3951,7 @@ Status ConvertRelu6(OpConverterParams* params) {
|
||||
*relu_layer->getOutput(0), *const6_tensor,
|
||||
nvinfer1::ElementWiseOperation::kMIN);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(relu6_layer, node_def.name());
|
||||
SetLayerName(relu6_layer, node_def, "min");
|
||||
nvinfer1::ITensor* output_tensor = relu6_layer->getOutput(0);
|
||||
params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 6.0f);
|
||||
|
||||
@ -3939,6 +3996,7 @@ Status ConvertBiasAddInt8WithoutCalibration(OpConverterParams* params) {
|
||||
nvinfer1::IShuffleLayer* shuffle_layer =
|
||||
params->converter->network()->addShuffle(*tensor);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(shuffle_layer, node_def.name());
|
||||
SetLayerName(shuffle_layer, node_def, "shuffle", /*op_instance=*/0);
|
||||
params->converter->MarkQuantizationRangesAsInferrable(
|
||||
tensor, shuffle_layer->getOutput(0));
|
||||
|
||||
@ -3970,6 +4028,7 @@ Status ConvertBiasAddInt8WithoutCalibration(OpConverterParams* params) {
|
||||
*tensor, mode, weights.GetTrtWeights(), empty_weights.GetTrtWeights(),
|
||||
empty_weights.GetTrtWeights());
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def, "scale");
|
||||
|
||||
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
|
||||
|
||||
@ -3978,6 +4037,7 @@ Status ConvertBiasAddInt8WithoutCalibration(OpConverterParams* params) {
|
||||
nvinfer1::IShuffleLayer* shuffle_layer =
|
||||
params->converter->network()->addShuffle(*output_tensor);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(shuffle_layer, node_def.name());
|
||||
SetLayerName(shuffle_layer, node_def, "shuffle", /*op_instance=*/1);
|
||||
// NOTE: for same reason as mentioned above we need to apply the reshape
|
||||
// unconditionally.
|
||||
nvinfer1::Dims reshape_dims = original_dims;
|
||||
@ -4062,13 +4122,16 @@ Status ConvertBiasAdd(OpConverterParams* params) {
|
||||
// Convert input to a TRT tensor
|
||||
nvinfer1::ITensor* input_tensor{nullptr};
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
inputs.at(0), input_shape, params->validation_only, &input_tensor));
|
||||
inputs.at(0), input_shape, params->validation_only, &input_tensor,
|
||||
node_def,
|
||||
/*op_instance=*/0));
|
||||
|
||||
// Finally, reshape bias. Since the bias is usually a constant, this will
|
||||
// normally happen at conversion-time.
|
||||
nvinfer1::ITensor* bias_tensor{nullptr};
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
inputs.at(1), bias_shape, params->validation_only, &bias_tensor));
|
||||
inputs.at(1), bias_shape, params->validation_only, &bias_tensor, node_def,
|
||||
/*op_instance=*/1));
|
||||
VLOG(2) << "Bias shape adjusted to " << DebugString(bias_shape);
|
||||
|
||||
if (params->validation_only) return Status::OK();
|
||||
@ -4077,6 +4140,7 @@ Status ConvertBiasAdd(OpConverterParams* params) {
|
||||
params->converter->network()->addElementWise(
|
||||
*input_tensor, *bias_tensor, nvinfer1::ElementWiseOperation::kSUM);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def, "sum");
|
||||
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
|
||||
|
||||
params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
|
||||
@ -4305,9 +4369,11 @@ Status ConvertBinary(OpConverterParams* params) {
|
||||
nvinfer1::ITensor* tensor_r = nullptr;
|
||||
// This will also convert constants to tensors, and set quantization ranges.
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
operand_l, broadcasted_dims_l, params->validation_only, &tensor_l));
|
||||
operand_l, broadcasted_dims_l, params->validation_only, &tensor_l,
|
||||
node_def, /*op_instance=*/0));
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
operand_r, broadcasted_dims_r, params->validation_only, &tensor_r));
|
||||
operand_r, broadcasted_dims_r, params->validation_only, &tensor_r,
|
||||
node_def, /*op_instance=*/1));
|
||||
if (params->validation_only) return Status::OK();
|
||||
|
||||
// Add ElementWise layer.
|
||||
@ -4322,6 +4388,7 @@ Status ConvertBinary(OpConverterParams* params) {
|
||||
layer = params->converter->network()->addUnary(
|
||||
*trt_tensor, nvinfer1::UnaryOperation::kFLOOR);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def, "floor");
|
||||
trt_tensor = layer->getOutput(0);
|
||||
}
|
||||
#endif
|
||||
@ -4360,10 +4427,12 @@ Status ConvertRsqrt(OpConverterParams* params) {
|
||||
nvinfer1::IUnaryLayer* sqrt_layer = params->converter->network()->addUnary(
|
||||
*tensor, nvinfer1::UnaryOperation::kSQRT);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(sqrt_layer, node_def.name());
|
||||
SetLayerName(sqrt_layer, node_def, "sqrt");
|
||||
// Recip
|
||||
nvinfer1::IUnaryLayer* recip_layer = params->converter->network()->addUnary(
|
||||
*sqrt_layer->getOutput(0), nvinfer1::UnaryOperation::kRECIP);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(recip_layer, node_def.name());
|
||||
SetLayerName(recip_layer, node_def, "recip");
|
||||
params->outputs->push_back(TRT_TensorOrWeights(recip_layer->getOutput(0)));
|
||||
return Status::OK();
|
||||
}
|
||||
@ -4460,6 +4529,7 @@ Status ConvertSquare(OpConverterParams* params) {
|
||||
*inputs.at(0).tensor(), *const2_tensor,
|
||||
nvinfer1::ElementWiseOperation::kPOW);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def);
|
||||
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
|
||||
|
||||
params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
|
||||
@ -4518,6 +4588,7 @@ Status ConvertReduce(OpConverterParams* params) {
|
||||
nvinfer1::ILayer* layer = params->converter->network()->addReduce(
|
||||
*tensor, reduce_operation, axes, keep_dims);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def);
|
||||
|
||||
params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
|
||||
return Status::OK();
|
||||
@ -4592,21 +4663,25 @@ Status ConvertPack(OpConverterParams* params) {
|
||||
nvinfer1::Dims expanded_dims;
|
||||
TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(tensor_dims, &expanded_dims));
|
||||
std::vector<nvinfer1::ITensor*> expanded_tensors;
|
||||
int input_index = 0;
|
||||
for (const TRT_TensorOrWeights& input : inputs) {
|
||||
nvinfer1::ITensor* expanded_tensor = nullptr;
|
||||
if (input.is_tensor() && !params->use_implicit_batch &&
|
||||
!HasStaticShape(dims)) {
|
||||
if (!params->validation_only) {
|
||||
TF_RETURN_IF_ERROR(params->converter->DynamicExpandDims(
|
||||
input.tensor(), dims, trt_axis, params, &expanded_tensor));
|
||||
input.tensor(), dims, trt_axis, params, &expanded_tensor,
|
||||
input_index));
|
||||
}
|
||||
} else {
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
input, expanded_dims, params->validation_only, &expanded_tensor));
|
||||
input, expanded_dims, params->validation_only, &expanded_tensor,
|
||||
node_def, input_index));
|
||||
}
|
||||
if (!params->validation_only) {
|
||||
expanded_tensors.push_back(expanded_tensor);
|
||||
}
|
||||
input_index++;
|
||||
}
|
||||
if (params->validation_only) return Status::OK();
|
||||
|
||||
@ -4622,6 +4697,7 @@ Status ConvertPack(OpConverterParams* params) {
|
||||
const_cast<nvinfer1::ITensor**>(expanded_tensors.data()),
|
||||
expanded_tensors.size());
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def, "concat");
|
||||
// Note that trt_axis stays the same even after expanding tensors at the axis.
|
||||
layer->setAxis(trt_axis);
|
||||
params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
|
||||
@ -4721,6 +4797,7 @@ Status ConvertPad(OpConverterParams* params) {
|
||||
nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding(
|
||||
*tensor, pre_padding, post_padding);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def);
|
||||
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
|
||||
params->converter->MarkQuantizationRangesAsInferrable(tensor, output_tensor);
|
||||
|
||||
@ -4786,7 +4863,7 @@ Status ConvertSplitHelper(OpConverterParams* params,
|
||||
for (int i = 0; i < num_splits; ++i) {
|
||||
begin[trt_axis + 1] = i * split_size_on_axis;
|
||||
TF_RETURN_IF_ERROR(ConvertStridedSliceHelper(
|
||||
params, input, begin, size, stride, final_shape_for_unpack_ptr));
|
||||
params, input, begin, size, stride, final_shape_for_unpack_ptr, i));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -4860,6 +4937,7 @@ Status ConvertCast(OpConverterParams* params) {
|
||||
nvinfer1::ITensor* input = params->inputs.at(0).tensor();
|
||||
nvinfer1::IIdentityLayer* layer =
|
||||
params->converter->network()->addIdentity(*input);
|
||||
SetLayerName(layer, node_def);
|
||||
layer->setPrecision(nvinfer1::DataType::kFLOAT);
|
||||
|
||||
if (layer->getOutput(0)->getType() != nvinfer1::DataType::kFLOAT) {
|
||||
@ -4917,6 +4995,7 @@ Status ConvertConcat(OpConverterParams* params) {
|
||||
const_cast<nvinfer1::ITensor* const*>(input_tensors.data()),
|
||||
input_tensors.size());
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def);
|
||||
layer->setAxis(trt_axis);
|
||||
params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
|
||||
return Status::OK();
|
||||
@ -5143,6 +5222,7 @@ Status ConvertGather(OpConverterParams* params) {
|
||||
nvinfer1::IGatherLayer* layer = params->converter->network()->addGather(
|
||||
*params_tensor, *indices_input.tensor(), trt_axis);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def);
|
||||
|
||||
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
|
||||
nvinfer1::Dims trt_gather_output_dims = output_tensor->getDimensions();
|
||||
@ -5169,7 +5249,7 @@ Status ConvertGather(OpConverterParams* params) {
|
||||
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
TRT_TensorOrWeights(output_tensor), trt_gather_output_dims,
|
||||
/*validation_only=*/false, &output_tensor));
|
||||
/*validation_only=*/false, &output_tensor, node_def));
|
||||
}
|
||||
|
||||
params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
|
||||
@ -5179,7 +5259,7 @@ Status ConvertGather(OpConverterParams* params) {
|
||||
Status ConvertFullyConnectedHelper(OpConverterParams* params,
|
||||
nvinfer1::ITensor* tensor_a,
|
||||
TRT_ShapedWeights weights_b,
|
||||
bool transpose_b, const string& node_name) {
|
||||
bool transpose_b, const NodeDef& node_def) {
|
||||
// Reshape input to 3D - this will be a no-op unless using int8 precision.
|
||||
auto input_dim = tensor_a->getDimensions();
|
||||
while (input_dim.nbDims < 3) {
|
||||
@ -5187,7 +5267,7 @@ Status ConvertFullyConnectedHelper(OpConverterParams* params,
|
||||
}
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
TRT_TensorOrWeights(tensor_a), input_dim, /*validation_only=*/false,
|
||||
&tensor_a));
|
||||
&tensor_a, node_def, /*op_instance=*/0));
|
||||
|
||||
// FC layer will transpose weights, so we need to pre-transpose.
|
||||
TRT_ShapedWeights weights(weights_b.TrtDType());
|
||||
@ -5203,7 +5283,8 @@ Status ConvertFullyConnectedHelper(OpConverterParams* params,
|
||||
params->converter->network()->addFullyConnected(
|
||||
*tensor_a, noutput, weights.GetTrtWeights(), biases.GetTrtWeights());
|
||||
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_name);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def);
|
||||
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
|
||||
|
||||
// Reshape output to 1D - this will be a no-op unless using int8 precision.
|
||||
@ -5211,7 +5292,7 @@ Status ConvertFullyConnectedHelper(OpConverterParams* params,
|
||||
output_dim.nbDims = 1;
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
TRT_TensorOrWeights(output_tensor), output_dim, /*validation_only=*/false,
|
||||
&output_tensor));
|
||||
&output_tensor, node_def, /*op_instance=*/1));
|
||||
|
||||
params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
|
||||
return Status::OK();
|
||||
@ -5220,7 +5301,7 @@ Status ConvertFullyConnectedHelper(OpConverterParams* params,
|
||||
Status ConvertMatMulHelper(OpConverterParams* params,
|
||||
TRT_TensorOrWeights input_a,
|
||||
TRT_TensorOrWeights input_b, bool transpose_a,
|
||||
bool transpose_b, string node_name) {
|
||||
bool transpose_b, const NodeDef& node_def) {
|
||||
// TODO: ReorderCKtoKC is currently not general enough to transpose weights
|
||||
// that are not 2D.
|
||||
if ((transpose_a && input_a.is_weights() &&
|
||||
@ -5258,7 +5339,7 @@ Status ConvertMatMulHelper(OpConverterParams* params,
|
||||
if (should_use_fc || (can_use_fc && params->converter->precision_mode() ==
|
||||
TrtPrecisionMode::INT8)) {
|
||||
return ConvertFullyConnectedHelper(
|
||||
params, input_a.tensor(), input_b.weights(), transpose_b, node_name);
|
||||
params, input_a.tensor(), input_b.weights(), transpose_b, node_def);
|
||||
}
|
||||
|
||||
const auto get_matrix_op = [](nvinfer1::ITensor* in,
|
||||
@ -5299,7 +5380,8 @@ Status ConvertMatMulHelper(OpConverterParams* params,
|
||||
*tensor_a, get_matrix_op(tensor_a, transpose_a), *tensor_b,
|
||||
get_matrix_op(tensor_b, transpose_b));
|
||||
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_name);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def);
|
||||
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
|
||||
params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
|
||||
return Status::OK();
|
||||
@ -5322,7 +5404,7 @@ Status ConvertMatMul(OpConverterParams* params) {
|
||||
bool transpose_b = attrs.get<bool>("transpose_b");
|
||||
|
||||
return ConvertMatMulHelper(params, inputs.at(0), inputs.at(1), transpose_a,
|
||||
transpose_b, node_def.name());
|
||||
transpose_b, node_def);
|
||||
}
|
||||
|
||||
Status ConvertBatchMatMul(OpConverterParams* params) {
|
||||
@ -5385,14 +5467,16 @@ Status ConvertBatchMatMul(OpConverterParams* params) {
|
||||
nvinfer1::ITensor* tensor_l = nullptr;
|
||||
nvinfer1::ITensor* tensor_r = nullptr;
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
inputs.at(0), broadcasted_dims_l, params->validation_only, &tensor_l));
|
||||
inputs.at(0), broadcasted_dims_l, params->validation_only, &tensor_l,
|
||||
node_def));
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
inputs.at(1), broadcasted_dims_r, params->validation_only, &tensor_r));
|
||||
inputs.at(1), broadcasted_dims_r, params->validation_only, &tensor_r,
|
||||
node_def));
|
||||
if (params->validation_only) return Status::OK();
|
||||
|
||||
return ConvertMatMulHelper(params, TRT_TensorOrWeights(tensor_l),
|
||||
TRT_TensorOrWeights(tensor_r), transpose_a,
|
||||
transpose_b, node_def.name());
|
||||
transpose_b, node_def);
|
||||
}
|
||||
|
||||
Status ConvertSoftmax(OpConverterParams* params) {
|
||||
@ -5414,6 +5498,7 @@ Status ConvertSoftmax(OpConverterParams* params) {
|
||||
nvinfer1::ISoftMaxLayer* layer =
|
||||
params->converter->network()->addSoftMax(*tensor);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def);
|
||||
// Tensorflow SoftMax assumes applying softmax on the last dimension.
|
||||
layer->setAxes(1 << (num_trt_dims - 1));
|
||||
|
||||
@ -5458,6 +5543,7 @@ Status ConvertArgMinMax(OpConverterParams* params) {
|
||||
nvinfer1::ITopKLayer* layer = params->converter->network()->addTopK(
|
||||
*inputs.at(0).tensor(), topk_op, 1, reduce_axes);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def, "topk");
|
||||
nvinfer1::ITensor* output_indices_tensor = layer->getOutput(1);
|
||||
|
||||
// Squeeze on axis.
|
||||
@ -5468,7 +5554,7 @@ Status ConvertArgMinMax(OpConverterParams* params) {
|
||||
nvinfer1::ITensor* output_tensor = nullptr;
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
TRT_TensorOrWeights(output_indices_tensor), new_dims,
|
||||
/*validation_only=*/false, &output_tensor));
|
||||
/*validation_only=*/false, &output_tensor, node_def));
|
||||
|
||||
params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
|
||||
return Status::OK();
|
||||
@ -5514,6 +5600,7 @@ Status ConvertTopK(OpConverterParams* params) {
|
||||
nvinfer1::ITopKLayer* layer =
|
||||
params->converter->network()->addTopK(*tensor, op, k, reduce_axes);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def);
|
||||
|
||||
nvinfer1::ITensor* output_value_tensor = layer->getOutput(0);
|
||||
nvinfer1::ITensor* output_indices_tensor = layer->getOutput(1);
|
||||
@ -5589,6 +5676,7 @@ Status ConvertDepthSpaceShuffle(OpConverterParams* params) {
|
||||
nvinfer1::IShuffleLayer* first_shuffle =
|
||||
params->converter->network()->addShuffle(*inputs.at(0).tensor());
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(first_shuffle, node_def.name());
|
||||
SetLayerName(first_shuffle, node_def, "shuffle", /*op_instance=*/0);
|
||||
if (data_format == "NHWC") {
|
||||
first_shuffle->setFirstTranspose({2, 0, 1});
|
||||
}
|
||||
@ -5598,6 +5686,7 @@ Status ConvertDepthSpaceShuffle(OpConverterParams* params) {
|
||||
nvinfer1::IShuffleLayer* second_shuffle =
|
||||
params->converter->network()->addShuffle(*first_shuffle->getOutput(0));
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(second_shuffle, node_def.name());
|
||||
SetLayerName(second_shuffle, node_def, "shuffle", /*op_instance=*/1);
|
||||
second_shuffle->setReshapeDimensions(second_shuffle_shape);
|
||||
if (data_format == "NHWC") {
|
||||
second_shuffle->setSecondTranspose({1, 2, 0});
|
||||
@ -5625,9 +5714,11 @@ Status ConvertSquaredDifference(OpConverterParams* params) {
|
||||
nvinfer1::ITensor* tensor_l = nullptr;
|
||||
nvinfer1::ITensor* tensor_r = nullptr;
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
inputs.at(0), broadcasted_dims_l, params->validation_only, &tensor_l));
|
||||
inputs.at(0), broadcasted_dims_l, params->validation_only, &tensor_l,
|
||||
node_def));
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
inputs.at(1), broadcasted_dims_r, params->validation_only, &tensor_r));
|
||||
inputs.at(1), broadcasted_dims_r, params->validation_only, &tensor_r,
|
||||
node_def));
|
||||
if (params->validation_only) return Status::OK();
|
||||
|
||||
// Subtract x - y.
|
||||
@ -5635,12 +5726,15 @@ Status ConvertSquaredDifference(OpConverterParams* params) {
|
||||
params->converter->network()->addElementWise(
|
||||
*tensor_l, *tensor_r, nvinfer1::ElementWiseOperation::kSUB);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(sub, node_def.name());
|
||||
SetLayerName(sub, node_def, "sub");
|
||||
|
||||
// Multiply (x - y) * (x - y).
|
||||
nvinfer1::IElementWiseLayer* mul =
|
||||
params->converter->network()->addElementWise(
|
||||
*sub->getOutput(0), *sub->getOutput(0),
|
||||
nvinfer1::ElementWiseOperation::kPROD);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(mul, node_def.name());
|
||||
SetLayerName(mul, node_def, "mul");
|
||||
|
||||
params->outputs->push_back(TRT_TensorOrWeights(mul->getOutput(0)));
|
||||
return Status::OK();
|
||||
@ -5778,6 +5872,7 @@ Status ConvertCombinedNMS(OpConverterParams* params) {
|
||||
nvinfer1::IPluginV2Layer* layer = params->converter->network()->addPluginV2(
|
||||
&plugin_inputs[0], static_cast<int>(plugin_inputs.size()), *plugin);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def, "plugin");
|
||||
|
||||
// Set plugin outputs
|
||||
nvinfer1::ITensor* output_nmsed_boxes = layer->getOutput(1);
|
||||
@ -5791,8 +5886,8 @@ Status ConvertCombinedNMS(OpConverterParams* params) {
|
||||
nvinfer1::ITensor* output_nmsed_scores = nullptr;
|
||||
nvinfer1::ITensor* output_nmsed_classes = nullptr;
|
||||
|
||||
auto shrink_last_dim = [params](nvinfer1::ITensor* in_tensor,
|
||||
nvinfer1::ITensor** out_tensor) {
|
||||
auto shrink_last_dim = [&](int output_index, nvinfer1::ITensor** out_tensor) {
|
||||
nvinfer1::ITensor* in_tensor = layer->getOutput(output_index);
|
||||
nvinfer1::Dims dims = in_tensor->getDimensions();
|
||||
if (dims.d[dims.nbDims - 1] != 1) {
|
||||
return errors::Internal("Expect last dims to be 1, for tensor ",
|
||||
@ -5801,15 +5896,12 @@ Status ConvertCombinedNMS(OpConverterParams* params) {
|
||||
--dims.nbDims;
|
||||
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
|
||||
TRT_TensorOrWeights(in_tensor), dims,
|
||||
/*validation_only=*/false, out_tensor));
|
||||
/*validation_only=*/false, out_tensor, node_def, output_index));
|
||||
return Status::OK();
|
||||
};
|
||||
TF_RETURN_IF_ERROR(
|
||||
shrink_last_dim(layer->getOutput(2), &output_nmsed_scores));
|
||||
TF_RETURN_IF_ERROR(
|
||||
shrink_last_dim(layer->getOutput(3), &output_nmsed_classes));
|
||||
TF_RETURN_IF_ERROR(
|
||||
shrink_last_dim(layer->getOutput(0), &output_num_detections));
|
||||
TF_RETURN_IF_ERROR(shrink_last_dim(2, &output_nmsed_scores));
|
||||
TF_RETURN_IF_ERROR(shrink_last_dim(3, &output_nmsed_classes));
|
||||
TF_RETURN_IF_ERROR(shrink_last_dim(0, &output_num_detections));
|
||||
#endif // IS_TRT_VERSION_GE(6, 0, 0, 0)
|
||||
|
||||
params->outputs->push_back(TRT_TensorOrWeights(output_nmsed_boxes));
|
||||
@ -5887,6 +5979,7 @@ Status ConvertResize(OpConverterParams* params) {
|
||||
nvinfer1::IResizeLayer* layer =
|
||||
params->converter->network()->addResize(*tensor);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def);
|
||||
|
||||
// Set layer parameters.
|
||||
layer->setResizeMode(resize_mode);
|
||||
@ -5946,6 +6039,7 @@ Status ConvertAddN(OpConverterParams* params) {
|
||||
nvinfer1::ILayer* layer = params->converter->network()->addElementWise(
|
||||
*lhs, *rhs, nvinfer1::ElementWiseOperation::kSUM);
|
||||
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
|
||||
SetLayerName(layer, node_def, std::to_string(i));
|
||||
lhs = layer->getOutput(0);
|
||||
}
|
||||
params->outputs->push_back(TRT_TensorOrWeights(lhs));
|
||||
@ -6068,6 +6162,8 @@ Status ConvertGraphDefToEngine(
|
||||
|
||||
VLOG(1) << "Starting to convert TensorFlow ops to TensorRT layers";
|
||||
std::vector<Converter::EngineOutputInfo> output_tensors;
|
||||
int num_layers = converter->network()->getNbLayers();
|
||||
absl::flat_hash_set<const char*> layer_names;
|
||||
// Graph nodes are already topologically sorted during construction
|
||||
for (const auto& node_def : gdef.node()) {
|
||||
const string& node_name = node_def.name();
|
||||
@ -6146,6 +6242,25 @@ Status ConvertGraphDefToEngine(
|
||||
} else {
|
||||
TF_RETURN_IF_ERROR(converter->ConvertNode(node_def));
|
||||
}
|
||||
|
||||
// To support TF-TRT profiling, we ensure each ILayer has a non-empty name.
|
||||
// BuildCudaEngine returns an error if there is any ILayer name collision.
|
||||
// We want to report the error here before BuildCudaEngine in a more
|
||||
// meaningful way.
|
||||
int new_num_layers = converter->network()->getNbLayers();
|
||||
for (int i = num_layers; i < new_num_layers; i++) {
|
||||
auto layer = converter->network()->getLayer(i);
|
||||
if (layer->getName() == nullptr ||
|
||||
!layer_names.insert(layer->getName()).second) {
|
||||
std::string error_message =
|
||||
absl::StrCat("Converting node ", node_name, ", op=", node_def.op(),
|
||||
layer->getName() ? "create a layer with name collision"
|
||||
: "create a layer without a name");
|
||||
LOG_WARNING_WITH_PREFIX << error_message;
|
||||
return errors::Internal(error_message);
|
||||
}
|
||||
}
|
||||
num_layers = new_num_layers;
|
||||
}
|
||||
TF_RETURN_IF_ERROR(converter->RenameAndMarkOutputTensors(output_tensors));
|
||||
if (convert_successfully) *convert_successfully = true;
|
||||
|
@ -22,6 +22,7 @@ limitations under the License.
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
|
||||
#include "tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h"
|
||||
#include "tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h"
|
||||
@ -525,8 +526,8 @@ class Converter {
|
||||
const NodeDef& node_def,
|
||||
absl::string_view sub_op_name = "");
|
||||
|
||||
// Converts 'input' into 'tensor' with shape specified by 'dims' (which
|
||||
// doesn't contain the batch dimension).
|
||||
// Converts 'input' of 'node_def' into 'tensor' with shape specified by 'dims'
|
||||
// (which doesn't contain the batch dimension).
|
||||
//
|
||||
// If validation_only is true, it doesn't do the conversion but only do some
|
||||
// minimum validation for the eligibility of the conversion, and *tensor will
|
||||
@ -534,7 +535,9 @@ class Converter {
|
||||
Status PrepareTensorForShape(const TRT_TensorOrWeights& input,
|
||||
const nvinfer1::Dims& dims,
|
||||
const bool validation_only,
|
||||
nvinfer1::ITensor** tensor);
|
||||
nvinfer1::ITensor** tensor,
|
||||
const NodeDef& node_def,
|
||||
absl::optional<int> op_instance = absl::nullopt);
|
||||
|
||||
// Reshapes a dynamic shape tensor by removing or adding dimensions of size 1,
|
||||
// and/or permuting the dimensions. The new shape is derived from the shape of
|
||||
@ -579,12 +582,14 @@ class Converter {
|
||||
Status DynamicReshape(nvinfer1::ITensor* input,
|
||||
std::vector<std::pair<int, int>> slices,
|
||||
OpConverterParams* params, nvinfer1::ITensor** output,
|
||||
std::vector<int> size_for_added_dims = {});
|
||||
std::vector<int> size_for_added_dims = {},
|
||||
absl::optional<int> op_instance = absl::nullopt);
|
||||
|
||||
// Inserts a singleton dimension at axis for a dynamic shape tensor.
|
||||
Status DynamicExpandDims(nvinfer1::ITensor* input, const nvinfer1::Dims& dims,
|
||||
int axis, OpConverterParams* params,
|
||||
nvinfer1::ITensor** output);
|
||||
nvinfer1::ITensor** output,
|
||||
absl::optional<int> op_instance = absl::nullopt);
|
||||
|
||||
// Helper function to add a squeeze op to the network.
|
||||
//
|
||||
@ -671,6 +676,10 @@ class Converter {
|
||||
// acceptable by TRT.
|
||||
int batch_size_ = -1;
|
||||
|
||||
// Assign a ID to each constant layer we create, so that we can assign a
|
||||
// unique name to the layer.
|
||||
int next_constant_layer_id_ = 0;
|
||||
|
||||
friend class ConverterTest;
|
||||
friend class OpConverterTest;
|
||||
};
|
||||
|
@ -214,6 +214,13 @@ void ExpectTrtLayerNames(absl::Span<const std::string> names,
|
||||
}
|
||||
}
|
||||
|
||||
void VerifyTrtLayerNameNotEmpty(nvinfer1::INetworkDefinition* network) {
|
||||
for (int i = 0; i < network->getNbLayers(); i++) {
|
||||
auto layer = network->getLayer(i);
|
||||
EXPECT_NE(layer->getName(), nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
Matcher<std::vector<float>> ArrayFloatNear(const std::vector<float>& values,
|
||||
float max_abs_error = 1e-5,
|
||||
bool nan_sensitive = false) {
|
||||
@ -814,6 +821,8 @@ TEST_F(ConverterTest, ConvertNode) {
|
||||
TF_EXPECT_OK(GetTensorOrWeights("my_op:1", &actual_output_2));
|
||||
EXPECT_EQ(&output_tensors[1], actual_output_2.tensor());
|
||||
EXPECT_EQ(125, actual_output_2.tensor()->getDimensions().d[0]);
|
||||
|
||||
VerifyTrtLayerNameNotEmpty(converter_->network());
|
||||
}
|
||||
|
||||
TEST_F(ConverterTest, AddAndGetInputs) {
|
||||
@ -843,6 +852,8 @@ TEST_F(ConverterTest, AddAndGetInputs) {
|
||||
ExpectTrtDimsEqualsArray({1}, inputs[0].tensor()->getDimensions());
|
||||
ExpectTrtDimsEqualsArray({2, 3}, inputs[2].tensor()->getDimensions());
|
||||
ExpectTrtDimsEqualsArray({5, 3}, inputs[3].tensor()->getDimensions());
|
||||
|
||||
VerifyTrtLayerNameNotEmpty(converter_->network());
|
||||
}
|
||||
|
||||
TEST_F(ConverterTest, RenameAndMarkOutputTensors) {
|
||||
@ -891,6 +902,8 @@ TEST_F(ConverterTest, RenameAndMarkOutputTensors) {
|
||||
}
|
||||
EXPECT_EQ("my_output", string(output_tensors[0]->getName()));
|
||||
EXPECT_EQ("my_output_1", string(output_tensors[1]->getName()));
|
||||
|
||||
VerifyTrtLayerNameNotEmpty(converter_->network());
|
||||
}
|
||||
|
||||
TEST_F(ConverterTest, TransposeTensor) {
|
||||
@ -934,9 +947,11 @@ void TestPrepareTensorForShape(
|
||||
}
|
||||
nvinfer1::ITensor* output_tensor = nullptr;
|
||||
|
||||
NodeDef dummy_node_def = MakeNodeDef("dummy_op", "DummyOp", {});
|
||||
for (bool validation_only : {false, true}) {
|
||||
const Status status = converter->PrepareTensorForShape(
|
||||
input, GetTestDims(reshape_dims), validation_only, &output_tensor);
|
||||
input, GetTestDims(reshape_dims), validation_only, &output_tensor,
|
||||
dummy_node_def);
|
||||
if (expected_code == error::OK) {
|
||||
TF_EXPECT_OK(status);
|
||||
if (validation_only) {
|
||||
@ -990,6 +1005,8 @@ TEST_F(ConverterTest, PrepareTensorForShape) {
|
||||
/*input_is_tensor=*/false, converter_.get(),
|
||||
weight_store_, error::INVALID_ARGUMENT,
|
||||
"Shape is not fully defined");
|
||||
|
||||
VerifyTrtLayerNameNotEmpty(converter_->network());
|
||||
}
|
||||
|
||||
TEST_F(ConverterTest, MaybeUpdateBatchSize) {
|
||||
@ -1063,6 +1080,8 @@ TEST_F(ConverterTest, ProvideQuantizationRange) {
|
||||
// Symmetric range
|
||||
converter_->ProvideQuantizationRange(&fake_tensor, -6.123f, 6.123f);
|
||||
EXPECT_EQ(6.123f, quantization_ranges()[&fake_tensor]);
|
||||
|
||||
VerifyTrtLayerNameNotEmpty(converter_->network());
|
||||
}
|
||||
|
||||
TEST_F(ConverterTest, MaybeApplyQuantizationRanges) {
|
||||
@ -1089,6 +1108,8 @@ TEST_F(ConverterTest, MaybeApplyQuantizationRanges) {
|
||||
EXPECT_EQ(infer_3.getDynamicRange(), 5.0f);
|
||||
EXPECT_EQ(not_infer.getDynamicRange(), 100.0f);
|
||||
#endif
|
||||
|
||||
VerifyTrtLayerNameNotEmpty(int8_converter->network());
|
||||
}
|
||||
|
||||
TEST_F(ConverterTest, PropagateQuantizationRanges) {
|
||||
@ -1111,6 +1132,8 @@ TEST_F(ConverterTest, PropagateQuantizationRanges) {
|
||||
EXPECT_EQ(5.0f, ranges[&infer[i]]);
|
||||
}
|
||||
EXPECT_EQ(ranges.count(¬_infer), 0);
|
||||
|
||||
VerifyTrtLayerNameNotEmpty(converter_->network());
|
||||
}
|
||||
|
||||
TEST_F(ConverterTest, GetTrtBroadcastShape) {
|
||||
@ -1214,6 +1237,8 @@ TEST_F(ConverterTest, GetTrtBroadcastShape) {
|
||||
"(tensor #dims 4 vs broadcast #dims 5)");
|
||||
symmetric_test({2, 3}, {7, 5}, kIsTensor, kIsTensor, {}, {},
|
||||
error::INVALID_ARGUMENT, "Infeasible broadcast scheme");
|
||||
|
||||
VerifyTrtLayerNameNotEmpty(converter_->network());
|
||||
}
|
||||
|
||||
TEST_F(ConverterTest, CreateConstantLayer) {
|
||||
@ -1228,6 +1253,8 @@ TEST_F(ConverterTest, CreateConstantLayer) {
|
||||
<< DebugString(tensor->getType());
|
||||
ExpectTrtDimsEqualsArray({3, 10}, tensor->getDimensions());
|
||||
}
|
||||
|
||||
VerifyTrtLayerNameNotEmpty(converter_->network());
|
||||
}
|
||||
|
||||
class ConvertGraphDefToEngineTest : public ::testing::Test {
|
||||
@ -1587,6 +1614,9 @@ class OpConverterTest : public ::testing::Test {
|
||||
const char* expected_msg_substr = nullptr) {
|
||||
ExpectStatus(converter_->ConvertNode(node->def()), expected_code,
|
||||
expected_msg_substr);
|
||||
if (expected_code == error::OK) {
|
||||
VerifyTrtLayerNameNotEmpty(converter_->network());
|
||||
}
|
||||
}
|
||||
|
||||
// Helper method to run both validation and conversion, when the expected
|
||||
|
Loading…
Reference in New Issue
Block a user