Merge pull request #28805 from trevor-m:develop-pranavm-mm-changes

PiperOrigin-RevId: 249724198
This commit is contained in:
TensorFlower Gardener 2019-05-23 15:15:57 -07:00
commit 07ade963a5
3 changed files with 157 additions and 570 deletions

View File

@ -385,11 +385,10 @@ string DebugString(const nvinfer1::ITensor& tensor) {
", dims=", DebugString(tensor.getDimensions()), ")");
}
Status Converter::GetTrtBroadcastShape(
const TRT_TensorOrWeights& operand_l, const TRT_TensorOrWeights& operand_r,
nvinfer1::Dims* operand_l_new_dims,
nvinfer1::Dims* operand_r_new_dims) const {
// ***************************************************************************
Status GetTrtBroadcastShape(const TRT_TensorOrWeights& operand_l,
const TRT_TensorOrWeights& operand_r,
nvinfer1::Dims* operand_l_new_dims,
nvinfer1::Dims* operand_r_new_dims) {
// TensorRT Elementwise op supports broadcast but requires both tensor to be
// of Identical rank
//
@ -1676,190 +1675,6 @@ Status UnaryCompute(const TRT_ShapedWeights& iweights,
return Status::OK();
}
// If swapped_inputs is false, 'tensor' is the left operand and 'weights' is the
// right operand. If swapped_inputs is true, those two are swapped.
//
// TODO(jie): broadcast is needed yet not implemented.
// Only implemented channel wise for the time being.
Status BinaryTensorOpWeight(OpConverterParams* params,
nvinfer1::ITensor* tensor,
TRT_ShapedWeights weights, bool swapped_inputs) {
static const std::unordered_set<string> supported_ops = {"Sub", "Add", "Mul",
"Div", "RealDiv"};
const auto& node_def = params->node_def;
if (!supported_ops.count(node_def.op())) {
return errors::Unimplemented(node_def.op(), " is not supported, at ",
node_def.name());
}
// Check scale mode.
auto dims_w = weights.shape_;
const auto dims_t = tensor->getDimensions();
// TODO(jie): addScale checks for input tensor dimension
if (dims_t.nbDims != 3) {
return errors::InvalidArgument("addScale requires tensor with rank 3, at ",
node_def.name());
}
// Default to element-wise
auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
// TODO(jie): maybe use a permutation instead to support more cases;
bool need_to_permute = false;
if (weights.count() == 1) {
scale_mode = nvinfer1::ScaleMode::kUNIFORM;
} else {
VLOG(2) << "weights dims: " << DebugString(dims_w)
<< "; tensor dims: " << DebugString(dims_t);
// Make sure no broadcasting on batch dimension.
if (dims_w.nbDims == dims_t.nbDims + 1) {
if (dims_w.d[0] == 1) {
for (int i = 1; i < dims_w.nbDims; i++) {
dims_w.d[i - 1] = dims_w.d[i];
}
dims_w.nbDims--;
} else {
return errors::InvalidArgument("Binary op cannot operate on batch, at ",
node_def.name());
}
}
if (dims_w.nbDims == dims_t.nbDims && dims_w.d[0] == dims_t.d[0]) {
scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
// Default is element-wise
for (int i = 1; i < dims_w.nbDims; i++) {
if (dims_w.d[i] != dims_t.d[i]) {
// If dimension does not match, switch back to per-channel
scale_mode = nvinfer1::ScaleMode::kCHANNEL;
break;
}
}
// If the mode is per-channel, since channel dimension is assumed to be
// the third to last dimension, we need to make sure all other dimensions
// have size 1.
if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) {
for (int i = 1; i < dims_w.nbDims; i++) {
if (dims_w.d[i] != 1)
return errors::InvalidArgument(
"Weight dims not compatible for channel-wise broadcast at ",
node_def.name());
}
}
} else if (dims_w.nbDims == 1 &&
dims_w.d[0] == dims_t.d[dims_t.nbDims - 1]) {
// Channel wise and broadcast required. We compare the last dimension of
// the tensor shape because of tensorflow default broadcasting rules.
need_to_permute = true;
scale_mode = nvinfer1::ScaleMode::kCHANNEL;
} else {
return errors::InvalidArgument("Weight dims not compatible at ",
node_def.name());
}
}
// TODO(laigd): we should add validation_only support in TransposeTensor() and
// PrepareTensorForShape().
if (params->validation_only) return Status::OK();
// Transpose last dimension.
std::vector<int> permutation(dims_t.nbDims + 1);
if (need_to_permute) {
// We swap the last dimension into channel for trt, because of tensorflow
// default broadcasting rules.
for (int i = 0; i < static_cast<int>(permutation.size()); i++) {
permutation[i] = i;
}
permutation[1] = dims_t.nbDims;
permutation[dims_t.nbDims] = 1;
TF_RETURN_IF_ERROR(
params->converter->TransposeTensor(tensor, permutation, &tensor));
}
// Prepare weights
TRT_ShapedWeights shift_weights(weights.TrtDType());
TRT_ShapedWeights scale_weights(weights.TrtDType());
TRT_ShapedWeights power_weights(weights.TrtDType());
if (node_def.op() == "Sub") {
if (swapped_inputs) {
shift_weights = weights;
nvinfer1::IUnaryLayer* layer = params->converter->network()->addUnary(
*tensor, nvinfer1::UnaryOperation::kNEG);
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
// Since quantization ranges are symmetric, the same range as the input
// will work for the negation of the input.
params->converter->MarkQuantizationRangesAsInferrable(
tensor, layer->getOutput(0));
tensor = layer->getOutput(0);
} else {
TRT_ShapedWeights neg_weights =
params->weight_store->GetTempWeights(weights);
LambdaFactory unary_op;
unary_op.op = LambdaFactory::OP_CATEGORY::NEG;
TF_RETURN_IF_ERROR(UnaryCompute(weights, &neg_weights, unary_op));
shift_weights = neg_weights;
}
} else if (node_def.op() == "Div" || node_def.op() == "RealDiv") {
if (swapped_inputs) {
// We need to infer the quantization range for this intermediate tensor.
//
// x -> [Recip] -> 1/x -> [Scale] -> s/x
// ^
// need range for this
//
// We have the quantization scales for x and s/x - can we divide the scale
// for s/x by s? Only if it is a scalar.
//
// Because of this issue, fall back to BinaryTensorOpTensor if we are
// doing INT8 with no calibration. There is most likely no performance
// penalty by falling back here.
if (params->converter->precision_mode() == TrtPrecisionMode::INT8 &&
!params->converter->use_calibration()) {
return errors::Unimplemented(
"Intermediate quantization range cannot be determined without"
" calibration. Falling back to BinaryTensorOpTensor for ",
node_def.op(), ", at ", node_def.name());
}
scale_weights = weights;
nvinfer1::IUnaryLayer* layer = params->converter->network()->addUnary(
*tensor, nvinfer1::UnaryOperation::kRECIP);
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
tensor = layer->getOutput(0);
} else {
TRT_ShapedWeights recip_weights =
params->weight_store->GetTempWeights(weights);
LambdaFactory unary_op;
unary_op.op = LambdaFactory::OP_CATEGORY::RECIP;
TF_RETURN_IF_ERROR(UnaryCompute(weights, &recip_weights, unary_op));
scale_weights = recip_weights;
}
} else if (node_def.op() == "Mul") {
scale_weights = weights;
} else if (node_def.op() == "Add") {
shift_weights = weights;
} else {
// This should not happen.
return errors::Unimplemented("Binary op not supported at ", node_def.op());
}
nvinfer1::IScaleLayer* layer = params->converter->network()->addScale(
*tensor, scale_mode, shift_weights.GetTrtWeights(),
scale_weights.GetTrtWeights(), power_weights.GetTrtWeights());
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
// Transpose back dimension
if (need_to_permute) {
TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
output_tensor, permutation, &output_tensor));
}
// Pass the output
params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
return Status::OK();
}
Status ConvertConv2DHelper(OpConverterParams* params, int group,
bool is_conv2d_backprop_input) {
const auto& inputs = params->inputs;
@ -2047,74 +1862,6 @@ Status ConvertConv2DHelper(OpConverterParams* params, int group,
return Status::OK();
}
Status BinaryTensorOpTensor(OpConverterParams* params,
const TRT_TensorOrWeights& operand_l,
const TRT_TensorOrWeights& operand_r) {
const auto& node_def = params->node_def;
static const std::unordered_map<string, nvinfer1::ElementWiseOperation> ops{
{"Add", nvinfer1::ElementWiseOperation::kSUM},
{"Mul", nvinfer1::ElementWiseOperation::kPROD},
{"Sub", nvinfer1::ElementWiseOperation::kSUB},
{"Div", nvinfer1::ElementWiseOperation::kDIV},
{"RealDiv", nvinfer1::ElementWiseOperation::kDIV},
{"Minimum", nvinfer1::ElementWiseOperation::kMIN},
{"Maximum", nvinfer1::ElementWiseOperation::kMAX},
{"Pow", nvinfer1::ElementWiseOperation::kPOW},
};
auto op_pair = ops.find(node_def.op());
if (op_pair == ops.end()) {
return errors::Unimplemented("Binary op ", node_def.op(),
" not supported at: ", node_def.name());
}
nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
Status status = params->converter->GetTrtBroadcastShape(
operand_l, operand_r, &broadcasted_dims_l, &broadcasted_dims_r);
if (!status.ok()) {
return errors::InvalidArgument(
"Unsupported binary op broadcast scheme for op ", node_def.name(), ": ",
status.error_message());
}
TFAttrs attrs(node_def);
nvinfer1::DataType dtype = attrs.get<nvinfer1::DataType>("T");
if (dtype == nvinfer1::DataType::kINT32) {
return errors::Unimplemented("Binary op ", node_def.op(),
" does not support INT32, at ",
node_def.name());
}
if (params->validation_only) return Status::OK();
nvinfer1::ITensor* tensor_l = nullptr;
nvinfer1::ITensor* tensor_r = nullptr;
status = params->converter->PrepareTensorForShape(
operand_l, broadcasted_dims_l, /*validation_only=*/false, &tensor_l);
if (status.ok()) {
status = params->converter->PrepareTensorForShape(
operand_r, broadcasted_dims_r, /*validation_only=*/false, &tensor_r);
}
if (!status.ok()) {
return errors::Internal("Failed to convert binary op ", node_def.name(),
": ", status.error_message());
}
// Check type consistency.
TFTRT_CHECK_EQ_TYPE(tensor_l->getType(), dtype)
<< DebugString(tensor_l->getType()) << " vs " << DebugString(dtype);
TFTRT_CHECK_EQ_TYPE(tensor_r->getType(), dtype)
<< DebugString(tensor_r->getType()) << " vs " << DebugString(dtype);
// Add ElementWise layer.
nvinfer1::IElementWiseLayer* layer =
params->converter->network()->addElementWise(*tensor_l, *tensor_r,
op_pair->second);
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
nvinfer1::ITensor* output_tensor = layer->getOutput(0);
// Pass the output
params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
return Status::OK();
}
Status ConvertPlugin(OpConverterParams* params) {
const auto& inputs = params->inputs;
const auto& node_def = params->node_def;
@ -3335,9 +3082,6 @@ Status ConvertIdentity(OpConverterParams* params) {
Status ConvertBinary(OpConverterParams* params) {
const auto& inputs = params->inputs;
const auto& node_def = params->node_def;
// TODO(tmorris): Enable once false is updated to mean either tensor or weight
// TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}, {"y",
// false}}));
if (inputs.size() != 2) {
return errors::InvalidArgument(node_def.op(), " got ", inputs.size(),
" inputs but expected 2, at ",
@ -3353,33 +3097,45 @@ Status ConvertBinary(OpConverterParams* params) {
"both input as constant at: ",
node_def.name());
}
const TRT_TensorOrWeights& operand_l = inputs.at(0);
const TRT_TensorOrWeights& operand_r = inputs.at(1);
// TODO(tmorris): TRT plans to deprecate IScaleLayer and will replace it with
// IElementwiseLayer. At that point, we can remove BinaryTensorOpWeight. For
// now, the performance will be slightly better with IScaleLayer because it
// can be fused in more situations. However, most of the benefits of
// IScaleLayer are when the layer performs both a shift and a scale, which we
// don't do except for convolutions.
//
// Try to convert into Scale layer first (for better performance).
// Since scale layer supports restricted broadcast policy and op types, we
// allow failure and try to handle it through Elementwise op
// (BinaryTensorOpTensor).
Status status = Status::OK();
if (inputs.at(0).is_tensor() && inputs.at(1).is_weights()) {
status = BinaryTensorOpWeight(params, inputs.at(0).tensor(),
inputs.at(1).weights(), false);
} else if (inputs.at(0).is_weights() && inputs.at(1).is_tensor()) {
status = BinaryTensorOpWeight(params, inputs.at(1).tensor(),
inputs.at(0).weights(), true);
static const std::unordered_map<string, nvinfer1::ElementWiseOperation> ops{
{"Add", nvinfer1::ElementWiseOperation::kSUM},
{"Mul", nvinfer1::ElementWiseOperation::kPROD},
{"Sub", nvinfer1::ElementWiseOperation::kSUB},
{"Div", nvinfer1::ElementWiseOperation::kDIV},
{"RealDiv", nvinfer1::ElementWiseOperation::kDIV},
{"Minimum", nvinfer1::ElementWiseOperation::kMIN},
{"Maximum", nvinfer1::ElementWiseOperation::kMAX},
{"Pow", nvinfer1::ElementWiseOperation::kPOW},
};
auto op_pair = ops.find(node_def.op());
if (op_pair == ops.end()) {
return errors::Unimplemented("Binary op ", node_def.op(),
" not supported at: ", node_def.name());
}
// If both input are tensors, or one of them is weights but the conversion
// above failed, try the conversion using BinaryTensorOpTensor.
if ((inputs.at(0).is_tensor() && inputs.at(1).is_tensor()) || !status.ok()) {
if (!status.ok()) VLOG(2) << status;
status = BinaryTensorOpTensor(params, inputs.at(0), inputs.at(1));
}
return status;
nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
TF_RETURN_IF_ERROR(GetTrtBroadcastShape(
operand_l, operand_r, &broadcasted_dims_l, &broadcasted_dims_r));
nvinfer1::ITensor* tensor_l = nullptr;
nvinfer1::ITensor* tensor_r = nullptr;
// This will also convert constants to tensors, and set quantization ranges.
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
operand_l, broadcasted_dims_l, params->validation_only, &tensor_l));
TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
operand_r, broadcasted_dims_r, params->validation_only, &tensor_r));
if (params->validation_only) return Status::OK();
// Add ElementWise layer.
nvinfer1::IElementWiseLayer* layer =
params->converter->network()->addElementWise(*tensor_l, *tensor_r,
op_pair->second);
TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
return Status::OK();
}
Status ConvertRsqrt(OpConverterParams* params) {
@ -4532,7 +4288,7 @@ Status ConvertSquaredDifference(OpConverterParams* params) {
const auto& node_def = params->node_def;
// Broadcast inputs.
nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
TF_RETURN_IF_ERROR(params->converter->GetTrtBroadcastShape(
TF_RETURN_IF_ERROR(GetTrtBroadcastShape(
inputs.at(0), inputs.at(1), &broadcasted_dims_l, &broadcasted_dims_r));
nvinfer1::ITensor* tensor_l = nullptr;
nvinfer1::ITensor* tensor_r = nullptr;

View File

@ -512,13 +512,6 @@ class Converter {
const bool validation_only,
nvinfer1::ITensor** tensor);
// Return OK if the broadcast scheme is supported and compute the shapes after
// broadcasting.
Status GetTrtBroadcastShape(const TRT_TensorOrWeights& operand_l,
const TRT_TensorOrWeights& operand_r,
nvinfer1::Dims* operand_l_new_dims,
nvinfer1::Dims* operand_r_new_dims) const;
// Creates an IConstantLayer using 'weights' whose dimensions are specified by
// 'dims', and returns the output ITensor.
nvinfer1::ITensor* CreateConstantLayer(const TRT_ShapedWeights& weights,
@ -592,6 +585,13 @@ class Converter {
friend class OpConverterTest;
};
// Return OK if the broadcast scheme is supported and compute the shapes after
// broadcasting.
Status GetTrtBroadcastShape(const TRT_TensorOrWeights& operand_l,
const TRT_TensorOrWeights& operand_r,
nvinfer1::Dims* operand_l_new_dims,
nvinfer1::Dims* operand_r_new_dims);
// Map of all supported UnaryOperations
const std::unordered_map<string, nvinfer1::UnaryOperation>* UnaryOperationMap();
// Map of all supported ActivationTypes

View File

@ -988,19 +988,17 @@ TEST_F(ConverterTest, GetTrtBroadcastShape) {
operand_2_shape, operand_2_is_tensor, operand_2_batch_size);
// operand_1 broadcast operand_2
ExpectStatus(
this->converter_->GetTrtBroadcastShape(
operand_1, operand_2, &operand_1_new_dims, &operand_2_new_dims),
expected_code, expected_error_msg_substr);
ExpectStatus(GetTrtBroadcastShape(operand_1, operand_2, &operand_1_new_dims,
&operand_2_new_dims),
expected_code, expected_error_msg_substr);
if (expected_code == error::OK) {
ExpectTrtDimsEqualsArray(expected_operand_1_shape, operand_1_new_dims);
ExpectTrtDimsEqualsArray(expected_operand_2_shape, operand_2_new_dims);
}
// operand_2 broadcast operand_1
ExpectStatus(
this->converter_->GetTrtBroadcastShape(
operand_2, operand_1, &operand_2_new_dims, &operand_1_new_dims),
expected_code, expected_error_msg_substr);
ExpectStatus(GetTrtBroadcastShape(operand_2, operand_1, &operand_2_new_dims,
&operand_1_new_dims),
expected_code, expected_error_msg_substr);
if (expected_code == error::OK) {
ExpectTrtDimsEqualsArray(expected_operand_1_shape, operand_1_new_dims);
ExpectTrtDimsEqualsArray(expected_operand_2_shape, operand_2_new_dims);
@ -1033,18 +1031,29 @@ TEST_F(ConverterTest, GetTrtBroadcastShape) {
error::INVALID_ARGUMENT,
"Broadcasting beyond batch dimension is not supported "
"(tensor #dims 4 vs broadcast #dims 5)");
symmetric_test({3}, {1, 1, 3}, kIsTensor, kIsNotTensor, {}, {},
error::INVALID_ARGUMENT,
"Broadcasting beyond batch dimension is not supported "
"(tensor #dims 2 vs broadcast #dims 3)",
/*operand_1_batch_size=*/2);
// Both inputs are tensors.
symmetric_test({1, 1, 1}, {1, 1}, kIsTensor, kIsTensor, {}, {},
error::INVALID_ARGUMENT,
"Broadcasting beyond batch dimension is not supported "
"(tensor #dims 3 vs broadcast #dims 4)");
symmetric_test({1, 3}, {3}, kIsTensor, kIsTensor, {}, {},
error::INVALID_ARGUMENT,
"Broadcasting beyond batch dimension is not supported "
"(tensor #dims 2 vs broadcast #dims 3)");
symmetric_test({1, 3, 4}, {2, 1, 4}, kIsTensor, kIsTensor, {1, 3, 4},
{2, 1, 4});
symmetric_test({1, 1, 1}, {1, 1, 1, 1}, kIsTensor, kIsTensor, {}, {},
error::INVALID_ARGUMENT,
"Broadcasting beyond batch dimension is not supported "
"(tensor #dims 4 vs broadcast #dims 5)");
symmetric_test({2, 3}, {7, 5}, kIsTensor, kIsTensor, {}, {},
error::INVALID_ARGUMENT, "Infeasible broadcast scheme");
}
TEST_F(ConverterTest, CreateConstantLayer) {
@ -2007,250 +2016,82 @@ void CheckAddedLayers(OpConverterTest* test, bool expect_scale_layer) {
}
template <typename OpType, DataType dtype>
void TestBinaryTensorOpWeightNoBroadcast(OpConverterTest* test) {
typedef typename EnumToDataType<dtype>::Type CType;
for (auto swap_inputs : {false, true}) {
test->Reset();
NodeDef node_def;
if (swap_inputs) {
node_def = GetBinaryOpNodeDef<OpType>("weights", "input", dtype);
} else {
node_def = GetBinaryOpNodeDef<OpType>("input", "weights", dtype);
}
const std::vector<CType> operand1{CType(3), CType(7.5)};
const std::vector<CType> operand2{CType(2), CType(3)};
// It requires the dims to be at least of rank 3 to apply an IScaleLayer.
test->AddTestTensor("input", /*dims=*/{1, 1, 2}, /*batch_size=*/1,
TfDataTypeToTrt(dtype));
test->AddTestWeights<CType>("weights", /*dims=*/{1, 1, 2},
/*values=*/swap_inputs ? operand1 : operand2);
test->RunValidationAndConversion(node_def);
// Make sure it does use BinaryTensorOpWeight, not BinaryTensorOpTensor.
CheckAddedLayers(test, /*expect_scale_layer=*/true);
// Check the dims of the output ITensor.
TRT_TensorOrWeights output;
TF_EXPECT_OK(test->GetTensorOrWeights("my_binary", &output));
ASSERT_TRUE(output.is_tensor());
ExpectTrtDimsEqualsArray({1, 1, 2}, output.tensor()->getDimensions());
const DataVec input_data{
{"input", test::AsTensor<CType>(swap_inputs ? operand2 : operand1)}};
DataVec output_data{{"my_binary", ConstructTensor<CType>(2)}};
test->BuildAndRun(
input_data, &output_data,
dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
if (node_def.op() == "Add") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(5), CType(10.5)));
} else if (node_def.op() == "Sub") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(1), CType(4.5)));
} else if (node_def.op() == "Mul") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(6), CType(22.5)));
} else if (node_def.op() == "Div") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(1.5), CType(2.5)));
} else if (node_def.op() == "RealDiv") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(1.5), CType(2.5)));
} else {
ASSERT_TRUE(false);
}
}
}
template <DataType dtype>
void TestBinaryTensorOpWeightWithChannelWiseBroadcast(OpConverterTest* test) {
typedef typename EnumToDataType<dtype>::Type CType;
const NodeDef node_def =
GetBinaryOpNodeDef<ops::Add>("input", "weights", dtype);
const std::vector<CType> input{CType(1), CType(2), CType(3), CType(4)};
const std::vector<CType> weights{CType(10), CType(20)};
// There are two types of valid dim pairs which requires channel-wise
// broadcasting:
// - input dims (X Y Z) vs weights dims (X 1 1)
// - input dims (X Y Z) vs weights dims (Z)
// Here X=Z=2 and Y=1.
for (auto weights_dims : std::vector<std::vector<int>>{{2, 1, 1}, {2}}) {
test->Reset();
test->AddTestTensor("input", /*dims=*/{2, 1, 2}, /*batch_size=*/1,
TfDataTypeToTrt(dtype));
test->AddTestWeights<CType>("weights", weights_dims, weights);
test->RunValidationAndConversion(node_def);
// Make sure it does use BinaryTensorOpWeight, not BinaryTensorOpTensor.
CheckAddedLayers(test, /*expect_scale_layer=*/true);
// Check the dims of the output ITensor.
TRT_TensorOrWeights output;
TF_EXPECT_OK(test->GetTensorOrWeights("my_binary", &output));
ASSERT_TRUE(output.is_tensor());
ExpectTrtDimsEqualsArray({2, 1, 2}, output.tensor()->getDimensions());
const DataVec input_data{{"input", test::AsTensor<CType>(input)}};
DataVec output_data{{"my_binary", ConstructTensor<CType>(4)}};
test->BuildAndRun(input_data, &output_data);
if (weights_dims.size() == 1) {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(11), CType(22), CType(13), CType(24)));
} else {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(11), CType(12), CType(23), CType(24)));
}
}
}
template <DataType dtype>
void TestBinaryTensorOpWeightWithUniformlyBroadcast(OpConverterTest* test) {
typedef typename EnumToDataType<dtype>::Type CType;
const NodeDef node_def =
GetBinaryOpNodeDef<ops::Add>("input", "weights", dtype);
const std::vector<CType> input{CType(1), CType(2), CType(3), CType(4)};
const std::vector<CType> weights{CType(10)};
test->Reset();
test->AddTestTensor("input", /*dims=*/{2, 1, 2}, /*batch_size=*/1,
TfDataTypeToTrt(dtype));
test->AddTestWeights<CType>("weights", {1, 1, 1, 1}, weights);
test->RunValidationAndConversion(node_def);
// Make sure it does use BinaryTensorOpWeight, not BinaryTensorOpTensor.
CheckAddedLayers(test, /*expect_scale_layer=*/true);
// Check the dims of the output ITensor.
TRT_TensorOrWeights output;
TF_EXPECT_OK(test->GetTensorOrWeights("my_binary", &output));
ASSERT_TRUE(output.is_tensor());
ExpectTrtDimsEqualsArray({2, 1, 2}, output.tensor()->getDimensions());
const DataVec input_data{{"input", test::AsTensor<CType>(input)}};
DataVec output_data{{"my_binary", ConstructTensor<CType>(4)}};
test->BuildAndRun(input_data, &output_data);
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(11), CType(12), CType(13), CType(14)));
}
template <typename OpType>
void TestBinaryTensorOpWeightFallback(OpConverterTest* test,
const std::vector<int32>& input_dims,
const std::vector<int>& weights_dims,
error::Code code = error::OK,
const char* error_msg_substr = nullptr,
const int input_batch_size = 1) {
const DataType dtype = DT_FLOAT;
typedef typename EnumToDataType<dtype>::Type CType;
const size_t num_inputs = TrtTensorDimsNumElements(GetTestDims(input_dims));
const size_t num_weights =
TrtWeightDimsNumElements(GetTestDims(weights_dims));
test->Reset();
const NodeDef node_def =
GetBinaryOpNodeDef<OpType>("input", "weights", dtype);
test->AddTestTensor("input", /*dims=*/input_dims, input_batch_size,
TfDataTypeToTrt(dtype));
test->AddTestWeights<CType>(
"weights", /*dims=*/weights_dims,
/*values=*/std::vector<CType>(num_weights, CType(1)));
test->RunValidationAndConversion(node_def, code, error_msg_substr);
if (code != error::OK) return;
// Make sure it does use BinaryTensorOpTensor, not BinaryTensorOpWeight.
CheckAddedLayers(test, /*expect_scale_layer=*/false);
TRT_TensorOrWeights output;
TF_EXPECT_OK(test->GetTensorOrWeights("my_binary", &output));
ASSERT_TRUE(output.is_tensor());
// Check the dims of the output ITensor.
std::vector<int> expected_output_dims = input_dims;
for (int i = expected_output_dims.size() - 1, j = weights_dims.size() - 1;
i >= 0 && j >= 0; --i, --j) {
if (expected_output_dims[i] == 1) {
expected_output_dims[i] = weights_dims[j];
}
}
ExpectTrtDimsEqualsArray(expected_output_dims,
output.tensor()->getDimensions());
// Check the result of running the engine.
const int expected_num_outputs =
TrtTensorDimsNumElements(GetTestDims(expected_output_dims));
const DataVec input_data{
{"input", ConstructTensor<CType>(num_inputs, CType(2))}};
DataVec output_data{
{"my_binary", ConstructTensor<CType>(expected_num_outputs)}};
test->BuildAndRun(input_data, &output_data);
if (node_def.op() == "Add") {
EXPECT_THAT(
GetSpanForData<CType>(output_data[0]),
ElementsAreArray(std::vector<CType>(expected_num_outputs, CType(3))));
} else if (node_def.op() == "Minimum") {
EXPECT_THAT(
GetSpanForData<CType>(output_data[0]),
ElementsAreArray(std::vector<CType>(expected_num_outputs, CType(1))));
} else {
ASSERT_TRUE(false);
}
}
template <typename OpType, DataType dtype>
void TestBinaryTensorOpTensor(OpConverterTest* test) {
void TestBinaryOp(OpConverterTest* test, bool operand_1_is_tensor,
bool operand_2_is_tensor) {
typedef typename EnumToDataType<dtype>::Type CType;
test->Reset();
const NodeDef node_def =
GetBinaryOpNodeDef<OpType>("input1", "input2", dtype);
test->AddTestTensor("input1", /*dims=*/{1, 2}, /*batch_size=*/1,
TfDataTypeToTrt(dtype));
test->AddTestTensor("input2", /*dims=*/{2, 1}, /*batch_size=*/1,
TfDataTypeToTrt(dtype));
if (operand_1_is_tensor) {
test->AddTestTensor("input1", /*dims=*/{1, 2}, /*batch_size=*/2,
TfDataTypeToTrt(dtype));
} else {
test->AddTestWeights("input1", /*dims=*/{1, 2},
/*values=*/std::vector<CType>{CType(3), CType(6)});
}
if (operand_2_is_tensor) {
test->AddTestTensor("input2", /*dims=*/{2, 1}, /*batch_size=*/2,
TfDataTypeToTrt(dtype));
} else {
test->AddTestWeights("input2", /*dims=*/{2, 1},
/*values=*/std::vector<CType>{CType(2), CType(3)});
}
test->RunValidationAndConversion(node_def);
// Make sure it does use BinaryTensorOpTensor, not BinaryTensorOpWeight.
CheckAddedLayers(test, /*expect_scale_layer=*/false);
DataVec input_data;
if (operand_1_is_tensor) {
input_data.push_back(
{"input1",
test::AsTensor<CType>({CType(3), CType(6), CType(3), CType(6)})});
}
if (operand_2_is_tensor) {
input_data.push_back(
{"input2",
test::AsTensor<CType>({CType(2), CType(3), CType(2), CType(3)})});
}
DataVec output_data{{"my_binary", ConstructTensor<CType>(8)}};
// Check output dims.
TRT_TensorOrWeights output;
TF_EXPECT_OK(test->GetTensorOrWeights("my_binary", &output));
ASSERT_TRUE(output.is_tensor());
ExpectTrtDimsEqualsArray({2, 2}, output.tensor()->getDimensions());
const DataVec input_data{
{"input1", test::AsTensor<CType>({CType(3), CType(6)})},
{"input2", test::AsTensor<CType>({CType(2), CType(3)})}};
DataVec output_data{{"my_binary", ConstructTensor<CType>(4)}};
// After broadcasting first input becomes {3, 6, 3, 6} and second input
// becomes {2, 3, 2, 3}.
test->BuildAndRun(
input_data, &output_data,
dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32,
/*batch_size=*/2);
if (node_def.op() == "Add") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(5), CType(8), CType(6), CType(9)));
EXPECT_THAT(
GetSpanForData<CType>(output_data[0]),
ElementsAreArray(CastTestVector<int, CType>({5, 8, 6, 9, 5, 8, 6, 9})));
} else if (node_def.op() == "Sub") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(1), CType(4), CType(0), CType(3)));
EXPECT_THAT(
GetSpanForData<CType>(output_data[0]),
ElementsAreArray(CastTestVector<int, CType>({1, 4, 0, 3, 1, 4, 0, 3})));
} else if (node_def.op() == "Mul") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(6), CType(12), CType(9), CType(18)));
ElementsAreArray(
CastTestVector<int, CType>({6, 12, 9, 18, 6, 12, 9, 18})));
} else if (node_def.op() == "Div") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(1.5), CType(3), CType(1), CType(2)));
ElementsAreArray(CastTestVector<float, CType>(
{1.5, 3, 1, 2, 1.5, 3, 1, 2})));
} else if (node_def.op() == "RealDiv") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(1.5), CType(3), CType(1), CType(2)));
ElementsAreArray(CastTestVector<float, CType>(
{1.5, 3, 1, 2, 1.5, 3, 1, 2})));
} else if (node_def.op() == "Minimum") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(2), CType(2), CType(3), CType(3)));
EXPECT_THAT(
GetSpanForData<CType>(output_data[0]),
ElementsAreArray(CastTestVector<int, CType>({2, 2, 3, 3, 2, 2, 3, 3})));
} else if (node_def.op() == "Maximum") {
EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
ElementsAre(CType(3), CType(6), CType(3), CType(6)));
EXPECT_THAT(
GetSpanForData<CType>(output_data[0]),
ElementsAreArray(CastTestVector<int, CType>({3, 6, 3, 6, 3, 6, 3, 6})));
} else if (node_def.op() == "Pow") {
ExpectArrayNear(
std::vector<CType>{CType(9), CType(36), CType(27), CType(216)},
CastTestVector<int, CType>({9, 36, 27, 216, 9, 36, 27, 216}),
GetSpanForData<CType>(output_data[0]));
} else {
ASSERT_TRUE(false);
@ -2284,58 +2125,48 @@ TEST_F(OpConverterTest, ConvertBinary) {
"both input as constant at: my_add");
}
// Test BinaryTensorOpWeight() without broadcasting.
TestBinaryTensorOpWeightNoBroadcast<ops::Add, DT_FLOAT>(this);
TestBinaryTensorOpWeightNoBroadcast<ops::Sub, DT_FLOAT>(this);
TestBinaryTensorOpWeightNoBroadcast<ops::Mul, DT_FLOAT>(this);
TestBinaryTensorOpWeightNoBroadcast<ops::Div, DT_FLOAT>(this);
TestBinaryTensorOpWeightNoBroadcast<ops::RealDiv, DT_FLOAT>(this);
TestBinaryTensorOpWeightNoBroadcast<ops::Add, DT_HALF>(this);
TestBinaryTensorOpWeightNoBroadcast<ops::Sub, DT_HALF>(this);
TestBinaryTensorOpWeightNoBroadcast<ops::Mul, DT_HALF>(this);
TestBinaryTensorOpWeightNoBroadcast<ops::Div, DT_HALF>(this);
TestBinaryTensorOpWeightNoBroadcast<ops::RealDiv, DT_HALF>(this);
// Test BinaryTensorOpWeight() with channel-wise broadcasting.
TestBinaryTensorOpWeightWithChannelWiseBroadcast<DT_FLOAT>(this);
// Test BinaryTensorOpWeight() with uniformly broadcasting.
TestBinaryTensorOpWeightWithUniformlyBroadcast<DT_FLOAT>(this);
// Test BinaryTensorOpWeight() falling back to BinaryTensorOpTensor().
// Unsupported op.
TestBinaryTensorOpWeightFallback<ops::Minimum>(this, {1, 1, 1}, {1});
// Rank of input tensor dimension <3.
TestBinaryTensorOpWeightFallback<ops::Add>(this, {1, 1}, {1});
// Broadcast on batch dimension, should fail.
TestBinaryTensorOpWeightFallback<ops::Add>(
this, {1, 1, 1}, {2, 1, 1, 1}, error::INVALID_ARGUMENT,
"Unsupported binary op broadcast scheme for op my_binary",
/*input_batch_size=*/2);
// Incompatible dims with per-channel mode.
TestBinaryTensorOpWeightFallback<ops::Add>(this, {1, 1, 1}, {1, 2, 1});
// Incompatible dims.
TestBinaryTensorOpWeightFallback<ops::Add>(this, {1, 2, 1}, {2});
// Test BinaryTensorOpTensor() with broadcasting.
TestBinaryTensorOpTensor<ops::Add, DT_FLOAT>(this);
TestBinaryTensorOpTensor<ops::Sub, DT_FLOAT>(this);
TestBinaryTensorOpTensor<ops::Mul, DT_FLOAT>(this);
TestBinaryTensorOpTensor<ops::Div, DT_FLOAT>(this);
TestBinaryTensorOpTensor<ops::RealDiv, DT_FLOAT>(this);
TestBinaryTensorOpTensor<ops::Minimum, DT_FLOAT>(this);
TestBinaryTensorOpTensor<ops::Maximum, DT_FLOAT>(this);
TestBinaryTensorOpTensor<ops::Pow, DT_FLOAT>(this);
TestBinaryTensorOpTensor<ops::Add, DT_HALF>(this);
TestBinaryTensorOpTensor<ops::Sub, DT_HALF>(this);
TestBinaryTensorOpTensor<ops::Mul, DT_HALF>(this);
TestBinaryTensorOpTensor<ops::Div, DT_HALF>(this);
TestBinaryTensorOpTensor<ops::RealDiv, DT_HALF>(this);
TestBinaryTensorOpTensor<ops::Minimum, DT_HALF>(this);
TestBinaryTensorOpTensor<ops::Maximum, DT_HALF>(this);
TestBinaryTensorOpTensor<ops::Pow, DT_HALF>(this);
// Test combinations of tensor vs weight inputs (except when both inputs are
// weights).
for (const bool operand_1_is_tensor : {true, false}) {
for (const bool operand_2_is_tensor : {true, false}) {
if (!operand_1_is_tensor && !operand_2_is_tensor) continue;
// FP32 tests
TestBinaryOp<ops::Add, DT_FLOAT>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Sub, DT_FLOAT>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Mul, DT_FLOAT>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Div, DT_FLOAT>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::RealDiv, DT_FLOAT>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Minimum, DT_FLOAT>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Maximum, DT_FLOAT>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Pow, DT_FLOAT>(this, operand_1_is_tensor,
operand_2_is_tensor);
// FP16 tests
// TODO(tmorris): Use templates to avoid duplication.
TestBinaryOp<ops::Add, DT_HALF>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Sub, DT_HALF>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Mul, DT_HALF>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Div, DT_HALF>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::RealDiv, DT_HALF>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Minimum, DT_HALF>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Maximum, DT_HALF>(this, operand_1_is_tensor,
operand_2_is_tensor);
TestBinaryOp<ops::Pow, DT_HALF>(this, operand_1_is_tensor,
operand_2_is_tensor);
}
}
}
TEST_F(OpConverterTest, ConvertQuantize) {