Merge pull request #28805 from trevor-m:develop-pranavm-mm-changes

PiperOrigin-RevId: 249724198
2019-05-23 15:15:57 -07:00 · 2019-05-23 15:15:57 -07:00 · 07ade963a5
commit 07ade963a5
parent 960535f0a9 dd8929321f
3 changed files with 157 additions and 570 deletions
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
@ -385,11 +385,10 @@ string DebugString(const nvinfer1::ITensor& tensor) {
                ", dims=", DebugString(tensor.getDimensions()), ")");
 }

-Status Converter::GetTrtBroadcastShape(
-    const TRT_TensorOrWeights& operand_l, const TRT_TensorOrWeights& operand_r,
-    nvinfer1::Dims* operand_l_new_dims,
-    nvinfer1::Dims* operand_r_new_dims) const {
-  // ***************************************************************************
+Status GetTrtBroadcastShape(const TRT_TensorOrWeights& operand_l,
+                            const TRT_TensorOrWeights& operand_r,
+                            nvinfer1::Dims* operand_l_new_dims,
+                            nvinfer1::Dims* operand_r_new_dims) {
  // TensorRT Elementwise op supports broadcast but requires both tensor to be
  // of Identical rank
  //
@ -1676,190 +1675,6 @@ Status UnaryCompute(const TRT_ShapedWeights& iweights,
  return Status::OK();
 }

-// If swapped_inputs is false, 'tensor' is the left operand and 'weights' is the
-// right operand. If swapped_inputs is true, those two are swapped.
-//
-// TODO(jie): broadcast is needed yet not implemented.
-// Only implemented channel wise for the time being.
-Status BinaryTensorOpWeight(OpConverterParams* params,
-                            nvinfer1::ITensor* tensor,
-                            TRT_ShapedWeights weights, bool swapped_inputs) {
-  static const std::unordered_set<string> supported_ops = {"Sub", "Add", "Mul",
-                                                           "Div", "RealDiv"};
-  const auto& node_def = params->node_def;
-  if (!supported_ops.count(node_def.op())) {
-    return errors::Unimplemented(node_def.op(), " is not supported, at ",
-                                 node_def.name());
-  }
-
-  // Check scale mode.
-  auto dims_w = weights.shape_;
-  const auto dims_t = tensor->getDimensions();
-
-  // TODO(jie): addScale checks for input tensor dimension
-  if (dims_t.nbDims != 3) {
-    return errors::InvalidArgument("addScale requires tensor with rank 3, at ",
-                                   node_def.name());
-  }
-
-  // Default to element-wise
-  auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
-
-  // TODO(jie): maybe use a permutation instead to support more cases;
-  bool need_to_permute = false;
-
-  if (weights.count() == 1) {
-    scale_mode = nvinfer1::ScaleMode::kUNIFORM;
-  } else {
-    VLOG(2) << "weights dims: " << DebugString(dims_w)
-            << "; tensor dims: " << DebugString(dims_t);
-    // Make sure no broadcasting on batch dimension.
-    if (dims_w.nbDims == dims_t.nbDims + 1) {
-      if (dims_w.d[0] == 1) {
-        for (int i = 1; i < dims_w.nbDims; i++) {
-          dims_w.d[i - 1] = dims_w.d[i];
-        }
-        dims_w.nbDims--;
-      } else {
-        return errors::InvalidArgument("Binary op cannot operate on batch, at ",
-                                       node_def.name());
-      }
-    }
-
-    if (dims_w.nbDims == dims_t.nbDims && dims_w.d[0] == dims_t.d[0]) {
-      scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
-      // Default is element-wise
-      for (int i = 1; i < dims_w.nbDims; i++) {
-        if (dims_w.d[i] != dims_t.d[i]) {
-          // If dimension does not match, switch back to per-channel
-          scale_mode = nvinfer1::ScaleMode::kCHANNEL;
-          break;
-        }
-      }
-      // If the mode is per-channel, since channel dimension is assumed to be
-      // the third to last dimension, we need to make sure all other dimensions
-      // have size 1.
-      if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) {
-        for (int i = 1; i < dims_w.nbDims; i++) {
-          if (dims_w.d[i] != 1)
-            return errors::InvalidArgument(
-                "Weight dims not compatible for channel-wise broadcast at ",
-                node_def.name());
-        }
-      }
-    } else if (dims_w.nbDims == 1 &&
-               dims_w.d[0] == dims_t.d[dims_t.nbDims - 1]) {
-      // Channel wise and broadcast required. We compare the last dimension of
-      // the tensor shape because of tensorflow default broadcasting rules.
-      need_to_permute = true;
-      scale_mode = nvinfer1::ScaleMode::kCHANNEL;
-    } else {
-      return errors::InvalidArgument("Weight dims not compatible at ",
-                                     node_def.name());
-    }
-  }
-  // TODO(laigd): we should add validation_only support in TransposeTensor() and
-  // PrepareTensorForShape().
-  if (params->validation_only) return Status::OK();
-
-  // Transpose last dimension.
-  std::vector<int> permutation(dims_t.nbDims + 1);
-  if (need_to_permute) {
-    // We swap the last dimension into channel for trt, because of tensorflow
-    // default broadcasting rules.
-    for (int i = 0; i < static_cast<int>(permutation.size()); i++) {
-      permutation[i] = i;
-    }
-    permutation[1] = dims_t.nbDims;
-    permutation[dims_t.nbDims] = 1;
-    TF_RETURN_IF_ERROR(
-        params->converter->TransposeTensor(tensor, permutation, &tensor));
-  }
-
-  // Prepare weights
-  TRT_ShapedWeights shift_weights(weights.TrtDType());
-  TRT_ShapedWeights scale_weights(weights.TrtDType());
-  TRT_ShapedWeights power_weights(weights.TrtDType());
-
-  if (node_def.op() == "Sub") {
-    if (swapped_inputs) {
-      shift_weights = weights;
-      nvinfer1::IUnaryLayer* layer = params->converter->network()->addUnary(
-          *tensor, nvinfer1::UnaryOperation::kNEG);
-      TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
-      // Since quantization ranges are symmetric, the same range as the input
-      // will work for the negation of the input.
-      params->converter->MarkQuantizationRangesAsInferrable(
-          tensor, layer->getOutput(0));
-      tensor = layer->getOutput(0);
-    } else {
-      TRT_ShapedWeights neg_weights =
-          params->weight_store->GetTempWeights(weights);
-      LambdaFactory unary_op;
-      unary_op.op = LambdaFactory::OP_CATEGORY::NEG;
-      TF_RETURN_IF_ERROR(UnaryCompute(weights, &neg_weights, unary_op));
-      shift_weights = neg_weights;
-    }
-  } else if (node_def.op() == "Div" || node_def.op() == "RealDiv") {
-    if (swapped_inputs) {
-      // We need to infer the quantization range for this intermediate tensor.
-      //
-      //   x -> [Recip] -> 1/x -> [Scale] -> s/x
-      //                    ^
-      //            need range for this
-      //
-      // We have the quantization scales for x and s/x - can we divide the scale
-      // for s/x by s? Only if it is a scalar.
-      //
-      // Because of this issue, fall back to BinaryTensorOpTensor if we are
-      // doing INT8 with no calibration. There is most likely no performance
-      // penalty by falling back here.
-      if (params->converter->precision_mode() == TrtPrecisionMode::INT8 &&
-          !params->converter->use_calibration()) {
-        return errors::Unimplemented(
-            "Intermediate quantization range cannot be determined without"
-            " calibration. Falling back to BinaryTensorOpTensor for ",
-            node_def.op(), ", at ", node_def.name());
-      }
-      scale_weights = weights;
-      nvinfer1::IUnaryLayer* layer = params->converter->network()->addUnary(
-          *tensor, nvinfer1::UnaryOperation::kRECIP);
-      TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
-      tensor = layer->getOutput(0);
-    } else {
-      TRT_ShapedWeights recip_weights =
-          params->weight_store->GetTempWeights(weights);
-      LambdaFactory unary_op;
-      unary_op.op = LambdaFactory::OP_CATEGORY::RECIP;
-      TF_RETURN_IF_ERROR(UnaryCompute(weights, &recip_weights, unary_op));
-      scale_weights = recip_weights;
-    }
-  } else if (node_def.op() == "Mul") {
-    scale_weights = weights;
-  } else if (node_def.op() == "Add") {
-    shift_weights = weights;
-  } else {
-    // This should not happen.
-    return errors::Unimplemented("Binary op not supported at ", node_def.op());
-  }
-
-  nvinfer1::IScaleLayer* layer = params->converter->network()->addScale(
-      *tensor, scale_mode, shift_weights.GetTrtWeights(),
-      scale_weights.GetTrtWeights(), power_weights.GetTrtWeights());
-  TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
-
-  nvinfer1::ITensor* output_tensor = layer->getOutput(0);
-  // Transpose back dimension
-  if (need_to_permute) {
-    TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
-        output_tensor, permutation, &output_tensor));
-  }
-
-  // Pass the output
-  params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
-  return Status::OK();
-}
-
 Status ConvertConv2DHelper(OpConverterParams* params, int group,
                           bool is_conv2d_backprop_input) {
  const auto& inputs = params->inputs;
@ -2047,74 +1862,6 @@ Status ConvertConv2DHelper(OpConverterParams* params, int group,
  return Status::OK();
 }

-Status BinaryTensorOpTensor(OpConverterParams* params,
-                            const TRT_TensorOrWeights& operand_l,
-                            const TRT_TensorOrWeights& operand_r) {
-  const auto& node_def = params->node_def;
-  static const std::unordered_map<string, nvinfer1::ElementWiseOperation> ops{
-      {"Add", nvinfer1::ElementWiseOperation::kSUM},
-      {"Mul", nvinfer1::ElementWiseOperation::kPROD},
-      {"Sub", nvinfer1::ElementWiseOperation::kSUB},
-      {"Div", nvinfer1::ElementWiseOperation::kDIV},
-      {"RealDiv", nvinfer1::ElementWiseOperation::kDIV},
-      {"Minimum", nvinfer1::ElementWiseOperation::kMIN},
-      {"Maximum", nvinfer1::ElementWiseOperation::kMAX},
-      {"Pow", nvinfer1::ElementWiseOperation::kPOW},
-  };
-  auto op_pair = ops.find(node_def.op());
-  if (op_pair == ops.end()) {
-    return errors::Unimplemented("Binary op ", node_def.op(),
-                                 " not supported at: ", node_def.name());
-  }
-
-  nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
-  Status status = params->converter->GetTrtBroadcastShape(
-      operand_l, operand_r, &broadcasted_dims_l, &broadcasted_dims_r);
-  if (!status.ok()) {
-    return errors::InvalidArgument(
-        "Unsupported binary op broadcast scheme for op ", node_def.name(), ": ",
-        status.error_message());
-  }
-  TFAttrs attrs(node_def);
-  nvinfer1::DataType dtype = attrs.get<nvinfer1::DataType>("T");
-  if (dtype == nvinfer1::DataType::kINT32) {
-    return errors::Unimplemented("Binary op ", node_def.op(),
-                                 " does not support INT32, at ",
-                                 node_def.name());
-  }
-  if (params->validation_only) return Status::OK();
-
-  nvinfer1::ITensor* tensor_l = nullptr;
-  nvinfer1::ITensor* tensor_r = nullptr;
-  status = params->converter->PrepareTensorForShape(
-      operand_l, broadcasted_dims_l, /*validation_only=*/false, &tensor_l);
-  if (status.ok()) {
-    status = params->converter->PrepareTensorForShape(
-        operand_r, broadcasted_dims_r, /*validation_only=*/false, &tensor_r);
-  }
-  if (!status.ok()) {
-    return errors::Internal("Failed to convert binary op ", node_def.name(),
-                            ": ", status.error_message());
-  }
-
-  // Check type consistency.
-  TFTRT_CHECK_EQ_TYPE(tensor_l->getType(), dtype)
-      << DebugString(tensor_l->getType()) << " vs " << DebugString(dtype);
-  TFTRT_CHECK_EQ_TYPE(tensor_r->getType(), dtype)
-      << DebugString(tensor_r->getType()) << " vs " << DebugString(dtype);
-
-  // Add ElementWise layer.
-  nvinfer1::IElementWiseLayer* layer =
-      params->converter->network()->addElementWise(*tensor_l, *tensor_r,
-                                                   op_pair->second);
-  TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
-  nvinfer1::ITensor* output_tensor = layer->getOutput(0);
-
-  // Pass the output
-  params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
-  return Status::OK();
-}
-
 Status ConvertPlugin(OpConverterParams* params) {
  const auto& inputs = params->inputs;
  const auto& node_def = params->node_def;
@ -3335,9 +3082,6 @@ Status ConvertIdentity(OpConverterParams* params) {
 Status ConvertBinary(OpConverterParams* params) {
  const auto& inputs = params->inputs;
  const auto& node_def = params->node_def;
-  // TODO(tmorris): Enable once false is updated to mean either tensor or weight
-  // TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"x", false}, {"y",
-  // false}}));
  if (inputs.size() != 2) {
    return errors::InvalidArgument(node_def.op(), " got ", inputs.size(),
                                   " inputs but expected 2, at ",
@ -3353,33 +3097,45 @@ Status ConvertBinary(OpConverterParams* params) {
        "both input as constant at: ",
        node_def.name());
  }
+  const TRT_TensorOrWeights& operand_l = inputs.at(0);
+  const TRT_TensorOrWeights& operand_r = inputs.at(1);

-  // TODO(tmorris): TRT plans to deprecate IScaleLayer and will replace it with
-  // IElementwiseLayer. At that point, we can remove BinaryTensorOpWeight. For
-  // now, the performance will be slightly better with IScaleLayer because it
-  // can be fused in more situations. However, most of the benefits of
-  // IScaleLayer are when the layer performs both a shift and a scale, which we
-  // don't do except for convolutions.
-  //
-  // Try to convert into Scale layer first (for better performance).
-  // Since scale layer supports restricted broadcast policy and op types, we
-  // allow failure and try to handle it through Elementwise op
-  // (BinaryTensorOpTensor).
-  Status status = Status::OK();
-  if (inputs.at(0).is_tensor() && inputs.at(1).is_weights()) {
-    status = BinaryTensorOpWeight(params, inputs.at(0).tensor(),
-                                  inputs.at(1).weights(), false);
-  } else if (inputs.at(0).is_weights() && inputs.at(1).is_tensor()) {
-    status = BinaryTensorOpWeight(params, inputs.at(1).tensor(),
-                                  inputs.at(0).weights(), true);
+  static const std::unordered_map<string, nvinfer1::ElementWiseOperation> ops{
+      {"Add", nvinfer1::ElementWiseOperation::kSUM},
+      {"Mul", nvinfer1::ElementWiseOperation::kPROD},
+      {"Sub", nvinfer1::ElementWiseOperation::kSUB},
+      {"Div", nvinfer1::ElementWiseOperation::kDIV},
+      {"RealDiv", nvinfer1::ElementWiseOperation::kDIV},
+      {"Minimum", nvinfer1::ElementWiseOperation::kMIN},
+      {"Maximum", nvinfer1::ElementWiseOperation::kMAX},
+      {"Pow", nvinfer1::ElementWiseOperation::kPOW},
+  };
+  auto op_pair = ops.find(node_def.op());
+  if (op_pair == ops.end()) {
+    return errors::Unimplemented("Binary op ", node_def.op(),
+                                 " not supported at: ", node_def.name());
  }
-  // If both input are tensors, or one of them is weights but the conversion
-  // above failed, try the conversion using BinaryTensorOpTensor.
-  if ((inputs.at(0).is_tensor() && inputs.at(1).is_tensor()) || !status.ok()) {
-    if (!status.ok()) VLOG(2) << status;
-    status = BinaryTensorOpTensor(params, inputs.at(0), inputs.at(1));
-  }
-  return status;
+
+  nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
+  TF_RETURN_IF_ERROR(GetTrtBroadcastShape(
+      operand_l, operand_r, &broadcasted_dims_l, &broadcasted_dims_r));
+
+  nvinfer1::ITensor* tensor_l = nullptr;
+  nvinfer1::ITensor* tensor_r = nullptr;
+  // This will also convert constants to tensors, and set quantization ranges.
+  TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
+      operand_l, broadcasted_dims_l, params->validation_only, &tensor_l));
+  TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
+      operand_r, broadcasted_dims_r, params->validation_only, &tensor_r));
+  if (params->validation_only) return Status::OK();
+
+  // Add ElementWise layer.
+  nvinfer1::IElementWiseLayer* layer =
+      params->converter->network()->addElementWise(*tensor_l, *tensor_r,
+                                                   op_pair->second);
+  TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
+  params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0)));
+  return Status::OK();
 }

 Status ConvertRsqrt(OpConverterParams* params) {
@ -4532,7 +4288,7 @@ Status ConvertSquaredDifference(OpConverterParams* params) {
  const auto& node_def = params->node_def;
  // Broadcast inputs.
  nvinfer1::Dims broadcasted_dims_l, broadcasted_dims_r;
-  TF_RETURN_IF_ERROR(params->converter->GetTrtBroadcastShape(
+  TF_RETURN_IF_ERROR(GetTrtBroadcastShape(
      inputs.at(0), inputs.at(1), &broadcasted_dims_l, &broadcasted_dims_r));
  nvinfer1::ITensor* tensor_l = nullptr;
  nvinfer1::ITensor* tensor_r = nullptr;
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
@ -512,13 +512,6 @@ class Converter {
                               const bool validation_only,
                               nvinfer1::ITensor** tensor);

-  // Return OK if the broadcast scheme is supported and compute the shapes after
-  // broadcasting.
-  Status GetTrtBroadcastShape(const TRT_TensorOrWeights& operand_l,
-                              const TRT_TensorOrWeights& operand_r,
-                              nvinfer1::Dims* operand_l_new_dims,
-                              nvinfer1::Dims* operand_r_new_dims) const;
-
  // Creates an IConstantLayer using 'weights' whose dimensions are specified by
  // 'dims', and returns the output ITensor.
  nvinfer1::ITensor* CreateConstantLayer(const TRT_ShapedWeights& weights,
@ -592,6 +585,13 @@ class Converter {
  friend class OpConverterTest;
 };

+// Return OK if the broadcast scheme is supported and compute the shapes after
+// broadcasting.
+Status GetTrtBroadcastShape(const TRT_TensorOrWeights& operand_l,
+                            const TRT_TensorOrWeights& operand_r,
+                            nvinfer1::Dims* operand_l_new_dims,
+                            nvinfer1::Dims* operand_r_new_dims);
+
 // Map of all supported UnaryOperations
 const std::unordered_map<string, nvinfer1::UnaryOperation>* UnaryOperationMap();
 // Map of all supported ActivationTypes
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
@ -988,19 +988,17 @@ TEST_F(ConverterTest, GetTrtBroadcastShape) {
        operand_2_shape, operand_2_is_tensor, operand_2_batch_size);

    // operand_1 broadcast operand_2
-    ExpectStatus(
-        this->converter_->GetTrtBroadcastShape(
-            operand_1, operand_2, &operand_1_new_dims, &operand_2_new_dims),
-        expected_code, expected_error_msg_substr);
+    ExpectStatus(GetTrtBroadcastShape(operand_1, operand_2, &operand_1_new_dims,
+                                      &operand_2_new_dims),
+                 expected_code, expected_error_msg_substr);
    if (expected_code == error::OK) {
      ExpectTrtDimsEqualsArray(expected_operand_1_shape, operand_1_new_dims);
      ExpectTrtDimsEqualsArray(expected_operand_2_shape, operand_2_new_dims);
    }
    // operand_2 broadcast operand_1
-    ExpectStatus(
-        this->converter_->GetTrtBroadcastShape(
-            operand_2, operand_1, &operand_2_new_dims, &operand_1_new_dims),
-        expected_code, expected_error_msg_substr);
+    ExpectStatus(GetTrtBroadcastShape(operand_2, operand_1, &operand_2_new_dims,
+                                      &operand_1_new_dims),
+                 expected_code, expected_error_msg_substr);
    if (expected_code == error::OK) {
      ExpectTrtDimsEqualsArray(expected_operand_1_shape, operand_1_new_dims);
      ExpectTrtDimsEqualsArray(expected_operand_2_shape, operand_2_new_dims);
@ -1033,18 +1031,29 @@ TEST_F(ConverterTest, GetTrtBroadcastShape) {
                 error::INVALID_ARGUMENT,
                 "Broadcasting beyond batch dimension is not supported "
                 "(tensor #dims 4 vs broadcast #dims 5)");
+  symmetric_test({3}, {1, 1, 3}, kIsTensor, kIsNotTensor, {}, {},
+                 error::INVALID_ARGUMENT,
+                 "Broadcasting beyond batch dimension is not supported "
+                 "(tensor #dims 2 vs broadcast #dims 3)",
+                 /*operand_1_batch_size=*/2);

  // Both inputs are tensors.
  symmetric_test({1, 1, 1}, {1, 1}, kIsTensor, kIsTensor, {}, {},
                 error::INVALID_ARGUMENT,
                 "Broadcasting beyond batch dimension is not supported "
                 "(tensor #dims 3 vs broadcast #dims 4)");
+  symmetric_test({1, 3}, {3}, kIsTensor, kIsTensor, {}, {},
+                 error::INVALID_ARGUMENT,
+                 "Broadcasting beyond batch dimension is not supported "
+                 "(tensor #dims 2 vs broadcast #dims 3)");
  symmetric_test({1, 3, 4}, {2, 1, 4}, kIsTensor, kIsTensor, {1, 3, 4},
                 {2, 1, 4});
  symmetric_test({1, 1, 1}, {1, 1, 1, 1}, kIsTensor, kIsTensor, {}, {},
                 error::INVALID_ARGUMENT,
                 "Broadcasting beyond batch dimension is not supported "
                 "(tensor #dims 4 vs broadcast #dims 5)");
+  symmetric_test({2, 3}, {7, 5}, kIsTensor, kIsTensor, {}, {},
+                 error::INVALID_ARGUMENT, "Infeasible broadcast scheme");
 }

 TEST_F(ConverterTest, CreateConstantLayer) {
@ -2007,250 +2016,82 @@ void CheckAddedLayers(OpConverterTest* test, bool expect_scale_layer) {
 }

 template <typename OpType, DataType dtype>
-void TestBinaryTensorOpWeightNoBroadcast(OpConverterTest* test) {
-  typedef typename EnumToDataType<dtype>::Type CType;
-  for (auto swap_inputs : {false, true}) {
-    test->Reset();
-    NodeDef node_def;
-    if (swap_inputs) {
-      node_def = GetBinaryOpNodeDef<OpType>("weights", "input", dtype);
-    } else {
-      node_def = GetBinaryOpNodeDef<OpType>("input", "weights", dtype);
-    }
-
-    const std::vector<CType> operand1{CType(3), CType(7.5)};
-    const std::vector<CType> operand2{CType(2), CType(3)};
-
-    // It requires the dims to be at least of rank 3 to apply an IScaleLayer.
-    test->AddTestTensor("input", /*dims=*/{1, 1, 2}, /*batch_size=*/1,
-                        TfDataTypeToTrt(dtype));
-    test->AddTestWeights<CType>("weights", /*dims=*/{1, 1, 2},
-                                /*values=*/swap_inputs ? operand1 : operand2);
-    test->RunValidationAndConversion(node_def);
-
-    // Make sure it does use BinaryTensorOpWeight, not BinaryTensorOpTensor.
-    CheckAddedLayers(test, /*expect_scale_layer=*/true);
-
-    // Check the dims of the output ITensor.
-    TRT_TensorOrWeights output;
-    TF_EXPECT_OK(test->GetTensorOrWeights("my_binary", &output));
-    ASSERT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({1, 1, 2}, output.tensor()->getDimensions());
-
-    const DataVec input_data{
-        {"input", test::AsTensor<CType>(swap_inputs ? operand2 : operand1)}};
-    DataVec output_data{{"my_binary", ConstructTensor<CType>(2)}};
-    test->BuildAndRun(
-        input_data, &output_data,
-        dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
-    if (node_def.op() == "Add") {
-      EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                  ElementsAre(CType(5), CType(10.5)));
-    } else if (node_def.op() == "Sub") {
-      EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                  ElementsAre(CType(1), CType(4.5)));
-    } else if (node_def.op() == "Mul") {
-      EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                  ElementsAre(CType(6), CType(22.5)));
-    } else if (node_def.op() == "Div") {
-      EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                  ElementsAre(CType(1.5), CType(2.5)));
-    } else if (node_def.op() == "RealDiv") {
-      EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                  ElementsAre(CType(1.5), CType(2.5)));
-    } else {
-      ASSERT_TRUE(false);
-    }
-  }
-}
-
-template <DataType dtype>
-void TestBinaryTensorOpWeightWithChannelWiseBroadcast(OpConverterTest* test) {
-  typedef typename EnumToDataType<dtype>::Type CType;
-  const NodeDef node_def =
-      GetBinaryOpNodeDef<ops::Add>("input", "weights", dtype);
-  const std::vector<CType> input{CType(1), CType(2), CType(3), CType(4)};
-  const std::vector<CType> weights{CType(10), CType(20)};
-  // There are two types of valid dim pairs which requires channel-wise
-  // broadcasting:
-  // - input dims (X Y Z) vs weights dims (X 1 1)
-  // - input dims (X Y Z) vs weights dims (Z)
-  // Here X=Z=2 and Y=1.
-  for (auto weights_dims : std::vector<std::vector<int>>{{2, 1, 1}, {2}}) {
-    test->Reset();
-    test->AddTestTensor("input", /*dims=*/{2, 1, 2}, /*batch_size=*/1,
-                        TfDataTypeToTrt(dtype));
-    test->AddTestWeights<CType>("weights", weights_dims, weights);
-    test->RunValidationAndConversion(node_def);
-
-    // Make sure it does use BinaryTensorOpWeight, not BinaryTensorOpTensor.
-    CheckAddedLayers(test, /*expect_scale_layer=*/true);
-
-    // Check the dims of the output ITensor.
-    TRT_TensorOrWeights output;
-    TF_EXPECT_OK(test->GetTensorOrWeights("my_binary", &output));
-    ASSERT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({2, 1, 2}, output.tensor()->getDimensions());
-
-    const DataVec input_data{{"input", test::AsTensor<CType>(input)}};
-    DataVec output_data{{"my_binary", ConstructTensor<CType>(4)}};
-    test->BuildAndRun(input_data, &output_data);
-    if (weights_dims.size() == 1) {
-      EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                  ElementsAre(CType(11), CType(22), CType(13), CType(24)));
-    } else {
-      EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                  ElementsAre(CType(11), CType(12), CType(23), CType(24)));
-    }
-  }
-}
-
-template <DataType dtype>
-void TestBinaryTensorOpWeightWithUniformlyBroadcast(OpConverterTest* test) {
-  typedef typename EnumToDataType<dtype>::Type CType;
-  const NodeDef node_def =
-      GetBinaryOpNodeDef<ops::Add>("input", "weights", dtype);
-  const std::vector<CType> input{CType(1), CType(2), CType(3), CType(4)};
-  const std::vector<CType> weights{CType(10)};
-  test->Reset();
-  test->AddTestTensor("input", /*dims=*/{2, 1, 2}, /*batch_size=*/1,
-                      TfDataTypeToTrt(dtype));
-  test->AddTestWeights<CType>("weights", {1, 1, 1, 1}, weights);
-  test->RunValidationAndConversion(node_def);
-
-  // Make sure it does use BinaryTensorOpWeight, not BinaryTensorOpTensor.
-  CheckAddedLayers(test, /*expect_scale_layer=*/true);
-
-  // Check the dims of the output ITensor.
-  TRT_TensorOrWeights output;
-  TF_EXPECT_OK(test->GetTensorOrWeights("my_binary", &output));
-  ASSERT_TRUE(output.is_tensor());
-  ExpectTrtDimsEqualsArray({2, 1, 2}, output.tensor()->getDimensions());
-
-  const DataVec input_data{{"input", test::AsTensor<CType>(input)}};
-  DataVec output_data{{"my_binary", ConstructTensor<CType>(4)}};
-  test->BuildAndRun(input_data, &output_data);
-  EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-              ElementsAre(CType(11), CType(12), CType(13), CType(14)));
-}
-
-template <typename OpType>
-void TestBinaryTensorOpWeightFallback(OpConverterTest* test,
-                                      const std::vector<int32>& input_dims,
-                                      const std::vector<int>& weights_dims,
-                                      error::Code code = error::OK,
-                                      const char* error_msg_substr = nullptr,
-                                      const int input_batch_size = 1) {
-  const DataType dtype = DT_FLOAT;
-  typedef typename EnumToDataType<dtype>::Type CType;
-  const size_t num_inputs = TrtTensorDimsNumElements(GetTestDims(input_dims));
-  const size_t num_weights =
-      TrtWeightDimsNumElements(GetTestDims(weights_dims));
-
-  test->Reset();
-  const NodeDef node_def =
-      GetBinaryOpNodeDef<OpType>("input", "weights", dtype);
-  test->AddTestTensor("input", /*dims=*/input_dims, input_batch_size,
-                      TfDataTypeToTrt(dtype));
-  test->AddTestWeights<CType>(
-      "weights", /*dims=*/weights_dims,
-      /*values=*/std::vector<CType>(num_weights, CType(1)));
-  test->RunValidationAndConversion(node_def, code, error_msg_substr);
-  if (code != error::OK) return;
-
-  // Make sure it does use BinaryTensorOpTensor, not BinaryTensorOpWeight.
-  CheckAddedLayers(test, /*expect_scale_layer=*/false);
-
-  TRT_TensorOrWeights output;
-  TF_EXPECT_OK(test->GetTensorOrWeights("my_binary", &output));
-  ASSERT_TRUE(output.is_tensor());
-
-  // Check the dims of the output ITensor.
-  std::vector<int> expected_output_dims = input_dims;
-  for (int i = expected_output_dims.size() - 1, j = weights_dims.size() - 1;
-       i >= 0 && j >= 0; --i, --j) {
-    if (expected_output_dims[i] == 1) {
-      expected_output_dims[i] = weights_dims[j];
-    }
-  }
-  ExpectTrtDimsEqualsArray(expected_output_dims,
-                           output.tensor()->getDimensions());
-
-  // Check the result of running the engine.
-  const int expected_num_outputs =
-      TrtTensorDimsNumElements(GetTestDims(expected_output_dims));
-  const DataVec input_data{
-      {"input", ConstructTensor<CType>(num_inputs, CType(2))}};
-  DataVec output_data{
-      {"my_binary", ConstructTensor<CType>(expected_num_outputs)}};
-  test->BuildAndRun(input_data, &output_data);
-  if (node_def.op() == "Add") {
-    EXPECT_THAT(
-        GetSpanForData<CType>(output_data[0]),
-        ElementsAreArray(std::vector<CType>(expected_num_outputs, CType(3))));
-  } else if (node_def.op() == "Minimum") {
-    EXPECT_THAT(
-        GetSpanForData<CType>(output_data[0]),
-        ElementsAreArray(std::vector<CType>(expected_num_outputs, CType(1))));
-  } else {
-    ASSERT_TRUE(false);
-  }
-}
-
-template <typename OpType, DataType dtype>
-void TestBinaryTensorOpTensor(OpConverterTest* test) {
+void TestBinaryOp(OpConverterTest* test, bool operand_1_is_tensor,
+                  bool operand_2_is_tensor) {
  typedef typename EnumToDataType<dtype>::Type CType;
  test->Reset();
  const NodeDef node_def =
      GetBinaryOpNodeDef<OpType>("input1", "input2", dtype);
-  test->AddTestTensor("input1", /*dims=*/{1, 2}, /*batch_size=*/1,
-                      TfDataTypeToTrt(dtype));
-  test->AddTestTensor("input2", /*dims=*/{2, 1}, /*batch_size=*/1,
-                      TfDataTypeToTrt(dtype));
+  if (operand_1_is_tensor) {
+    test->AddTestTensor("input1", /*dims=*/{1, 2}, /*batch_size=*/2,
+                        TfDataTypeToTrt(dtype));
+  } else {
+    test->AddTestWeights("input1", /*dims=*/{1, 2},
+                         /*values=*/std::vector<CType>{CType(3), CType(6)});
+  }
+  if (operand_2_is_tensor) {
+    test->AddTestTensor("input2", /*dims=*/{2, 1}, /*batch_size=*/2,
+                        TfDataTypeToTrt(dtype));
+  } else {
+    test->AddTestWeights("input2", /*dims=*/{2, 1},
+                         /*values=*/std::vector<CType>{CType(2), CType(3)});
+  }
  test->RunValidationAndConversion(node_def);

-  // Make sure it does use BinaryTensorOpTensor, not BinaryTensorOpWeight.
-  CheckAddedLayers(test, /*expect_scale_layer=*/false);
-
+  DataVec input_data;
+  if (operand_1_is_tensor) {
+    input_data.push_back(
+        {"input1",
+         test::AsTensor<CType>({CType(3), CType(6), CType(3), CType(6)})});
+  }
+  if (operand_2_is_tensor) {
+    input_data.push_back(
+        {"input2",
+         test::AsTensor<CType>({CType(2), CType(3), CType(2), CType(3)})});
+  }
+  DataVec output_data{{"my_binary", ConstructTensor<CType>(8)}};
  // Check output dims.
  TRT_TensorOrWeights output;
  TF_EXPECT_OK(test->GetTensorOrWeights("my_binary", &output));
  ASSERT_TRUE(output.is_tensor());
  ExpectTrtDimsEqualsArray({2, 2}, output.tensor()->getDimensions());
-
-  const DataVec input_data{
-      {"input1", test::AsTensor<CType>({CType(3), CType(6)})},
-      {"input2", test::AsTensor<CType>({CType(2), CType(3)})}};
-  DataVec output_data{{"my_binary", ConstructTensor<CType>(4)}};
  // After broadcasting first input becomes {3, 6, 3, 6} and second input
  // becomes {2, 3, 2, 3}.
  test->BuildAndRun(
      input_data, &output_data,
-      dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32);
+      dtype == DT_HALF ? TrtPrecisionMode::FP16 : TrtPrecisionMode::FP32,
+      /*batch_size=*/2);
  if (node_def.op() == "Add") {
-    EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                ElementsAre(CType(5), CType(8), CType(6), CType(9)));
+    EXPECT_THAT(
+        GetSpanForData<CType>(output_data[0]),
+        ElementsAreArray(CastTestVector<int, CType>({5, 8, 6, 9, 5, 8, 6, 9})));
  } else if (node_def.op() == "Sub") {
-    EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                ElementsAre(CType(1), CType(4), CType(0), CType(3)));
+    EXPECT_THAT(
+        GetSpanForData<CType>(output_data[0]),
+        ElementsAreArray(CastTestVector<int, CType>({1, 4, 0, 3, 1, 4, 0, 3})));
  } else if (node_def.op() == "Mul") {
    EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                ElementsAre(CType(6), CType(12), CType(9), CType(18)));
+                ElementsAreArray(
+                    CastTestVector<int, CType>({6, 12, 9, 18, 6, 12, 9, 18})));
  } else if (node_def.op() == "Div") {
    EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                ElementsAre(CType(1.5), CType(3), CType(1), CType(2)));
+                ElementsAreArray(CastTestVector<float, CType>(
+                    {1.5, 3, 1, 2, 1.5, 3, 1, 2})));
  } else if (node_def.op() == "RealDiv") {
    EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                ElementsAre(CType(1.5), CType(3), CType(1), CType(2)));
+                ElementsAreArray(CastTestVector<float, CType>(
+                    {1.5, 3, 1, 2, 1.5, 3, 1, 2})));
  } else if (node_def.op() == "Minimum") {
-    EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                ElementsAre(CType(2), CType(2), CType(3), CType(3)));
+    EXPECT_THAT(
+        GetSpanForData<CType>(output_data[0]),
+        ElementsAreArray(CastTestVector<int, CType>({2, 2, 3, 3, 2, 2, 3, 3})));
  } else if (node_def.op() == "Maximum") {
-    EXPECT_THAT(GetSpanForData<CType>(output_data[0]),
-                ElementsAre(CType(3), CType(6), CType(3), CType(6)));
+    EXPECT_THAT(
+        GetSpanForData<CType>(output_data[0]),
+        ElementsAreArray(CastTestVector<int, CType>({3, 6, 3, 6, 3, 6, 3, 6})));
  } else if (node_def.op() == "Pow") {
    ExpectArrayNear(
-        std::vector<CType>{CType(9), CType(36), CType(27), CType(216)},
+        CastTestVector<int, CType>({9, 36, 27, 216, 9, 36, 27, 216}),
        GetSpanForData<CType>(output_data[0]));
  } else {
    ASSERT_TRUE(false);
@ -2284,58 +2125,48 @@ TEST_F(OpConverterTest, ConvertBinary) {
        "both input as constant at: my_add");
  }

-  // Test BinaryTensorOpWeight() without broadcasting.
-  TestBinaryTensorOpWeightNoBroadcast<ops::Add, DT_FLOAT>(this);
-  TestBinaryTensorOpWeightNoBroadcast<ops::Sub, DT_FLOAT>(this);
-  TestBinaryTensorOpWeightNoBroadcast<ops::Mul, DT_FLOAT>(this);
-  TestBinaryTensorOpWeightNoBroadcast<ops::Div, DT_FLOAT>(this);
-  TestBinaryTensorOpWeightNoBroadcast<ops::RealDiv, DT_FLOAT>(this);
-
-  TestBinaryTensorOpWeightNoBroadcast<ops::Add, DT_HALF>(this);
-  TestBinaryTensorOpWeightNoBroadcast<ops::Sub, DT_HALF>(this);
-  TestBinaryTensorOpWeightNoBroadcast<ops::Mul, DT_HALF>(this);
-  TestBinaryTensorOpWeightNoBroadcast<ops::Div, DT_HALF>(this);
-  TestBinaryTensorOpWeightNoBroadcast<ops::RealDiv, DT_HALF>(this);
-
-  // Test BinaryTensorOpWeight() with channel-wise broadcasting.
-  TestBinaryTensorOpWeightWithChannelWiseBroadcast<DT_FLOAT>(this);
-
-  // Test BinaryTensorOpWeight() with uniformly broadcasting.
-  TestBinaryTensorOpWeightWithUniformlyBroadcast<DT_FLOAT>(this);
-
-  // Test BinaryTensorOpWeight() falling back to BinaryTensorOpTensor().
-  // Unsupported op.
-  TestBinaryTensorOpWeightFallback<ops::Minimum>(this, {1, 1, 1}, {1});
-  // Rank of input tensor dimension <3.
-  TestBinaryTensorOpWeightFallback<ops::Add>(this, {1, 1}, {1});
-  // Broadcast on batch dimension, should fail.
-  TestBinaryTensorOpWeightFallback<ops::Add>(
-      this, {1, 1, 1}, {2, 1, 1, 1}, error::INVALID_ARGUMENT,
-      "Unsupported binary op broadcast scheme for op my_binary",
-      /*input_batch_size=*/2);
-  // Incompatible dims with per-channel mode.
-  TestBinaryTensorOpWeightFallback<ops::Add>(this, {1, 1, 1}, {1, 2, 1});
-  // Incompatible dims.
-  TestBinaryTensorOpWeightFallback<ops::Add>(this, {1, 2, 1}, {2});
-
-  // Test BinaryTensorOpTensor() with broadcasting.
-  TestBinaryTensorOpTensor<ops::Add, DT_FLOAT>(this);
-  TestBinaryTensorOpTensor<ops::Sub, DT_FLOAT>(this);
-  TestBinaryTensorOpTensor<ops::Mul, DT_FLOAT>(this);
-  TestBinaryTensorOpTensor<ops::Div, DT_FLOAT>(this);
-  TestBinaryTensorOpTensor<ops::RealDiv, DT_FLOAT>(this);
-  TestBinaryTensorOpTensor<ops::Minimum, DT_FLOAT>(this);
-  TestBinaryTensorOpTensor<ops::Maximum, DT_FLOAT>(this);
-  TestBinaryTensorOpTensor<ops::Pow, DT_FLOAT>(this);
-
-  TestBinaryTensorOpTensor<ops::Add, DT_HALF>(this);
-  TestBinaryTensorOpTensor<ops::Sub, DT_HALF>(this);
-  TestBinaryTensorOpTensor<ops::Mul, DT_HALF>(this);
-  TestBinaryTensorOpTensor<ops::Div, DT_HALF>(this);
-  TestBinaryTensorOpTensor<ops::RealDiv, DT_HALF>(this);
-  TestBinaryTensorOpTensor<ops::Minimum, DT_HALF>(this);
-  TestBinaryTensorOpTensor<ops::Maximum, DT_HALF>(this);
-  TestBinaryTensorOpTensor<ops::Pow, DT_HALF>(this);
+  // Test combinations of tensor vs weight inputs (except when both inputs are
+  // weights).
+  for (const bool operand_1_is_tensor : {true, false}) {
+    for (const bool operand_2_is_tensor : {true, false}) {
+      if (!operand_1_is_tensor && !operand_2_is_tensor) continue;
+      // FP32 tests
+      TestBinaryOp<ops::Add, DT_FLOAT>(this, operand_1_is_tensor,
+                                       operand_2_is_tensor);
+      TestBinaryOp<ops::Sub, DT_FLOAT>(this, operand_1_is_tensor,
+                                       operand_2_is_tensor);
+      TestBinaryOp<ops::Mul, DT_FLOAT>(this, operand_1_is_tensor,
+                                       operand_2_is_tensor);
+      TestBinaryOp<ops::Div, DT_FLOAT>(this, operand_1_is_tensor,
+                                       operand_2_is_tensor);
+      TestBinaryOp<ops::RealDiv, DT_FLOAT>(this, operand_1_is_tensor,
+                                           operand_2_is_tensor);
+      TestBinaryOp<ops::Minimum, DT_FLOAT>(this, operand_1_is_tensor,
+                                           operand_2_is_tensor);
+      TestBinaryOp<ops::Maximum, DT_FLOAT>(this, operand_1_is_tensor,
+                                           operand_2_is_tensor);
+      TestBinaryOp<ops::Pow, DT_FLOAT>(this, operand_1_is_tensor,
+                                       operand_2_is_tensor);
+      // FP16 tests
+      // TODO(tmorris): Use templates to avoid duplication.
+      TestBinaryOp<ops::Add, DT_HALF>(this, operand_1_is_tensor,
+                                      operand_2_is_tensor);
+      TestBinaryOp<ops::Sub, DT_HALF>(this, operand_1_is_tensor,
+                                      operand_2_is_tensor);
+      TestBinaryOp<ops::Mul, DT_HALF>(this, operand_1_is_tensor,
+                                      operand_2_is_tensor);
+      TestBinaryOp<ops::Div, DT_HALF>(this, operand_1_is_tensor,
+                                      operand_2_is_tensor);
+      TestBinaryOp<ops::RealDiv, DT_HALF>(this, operand_1_is_tensor,
+                                          operand_2_is_tensor);
+      TestBinaryOp<ops::Minimum, DT_HALF>(this, operand_1_is_tensor,
+                                          operand_2_is_tensor);
+      TestBinaryOp<ops::Maximum, DT_HALF>(this, operand_1_is_tensor,
+                                          operand_2_is_tensor);
+      TestBinaryOp<ops::Pow, DT_HALF>(this, operand_1_is_tensor,
+                                      operand_2_is_tensor);
+    }
+  }
 }

 TEST_F(OpConverterTest, ConvertQuantize) {