When in int8 no-calibration mode, set layers with no range info to fp16, so

TensorRT doesn't fail to compile the whole segment. PiperOrigin-RevId: 255451092
2019-06-27 12:08:39 -07:00 · 2019-06-27 12:08:39 -07:00 · 32002bf4ed
commit 32002bf4ed
parent 160e51b6e8
1 changed files with 139 additions and 27 deletions
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
@ -1425,6 +1425,32 @@ void Converter::ProvideQuantizationRange(nvinfer1::ITensor* tensor,
  quantization_ranges_[tensor] = symmetric_range;
 }
 namespace {
 bool IsConvolution(const nvinfer1::ILayer* layer) {
  return layer->getType() == nvinfer1::LayerType::kCONVOLUTION;
 }
 bool IsScale(const nvinfer1::ILayer* layer) {
  return layer->getType() == nvinfer1::LayerType::kSCALE;
 }
 bool IsClipOrRelu(const nvinfer1::ILayer* layer) {
  if (layer->getType() != nvinfer1::LayerType::kACTIVATION) {
    return false;
  }
  auto activation_type = static_cast<const nvinfer1::IActivationLayer*>(layer)
                             ->getActivationType();
 #if IS_TRT_VERSION_GE(5, 1, 2, 0)
  return activation_type == nvinfer1::ActivationType::kRELU ||
         activation_type == nvinfer1::ActivationType::kCLIP;
 #else
  return activation_type == nvinfer1::ActivationType::kRELU;
 #endif
 }
 }  // namespace
 void Converter::MaybeApplyQuantizationRanges() {
  if (precision_mode() != TrtPrecisionMode::INT8) return;
@ -1442,34 +1468,120 @@ void Converter::MaybeApplyQuantizationRanges() {
  }
 #endif
-  // Warn user about tensors that are missing ranges. If TRT fuses some layers
+  if (use_calibration()) return;
-  // then these tensors may not actually be required, which is why this is
+  // Attempt to find tensors that are missing ranges, and set the corresponding
-  // just a warning. If we are still missing ranges even after fusion,
+  // layer's precision to FP16 to avoid Builder::buildCudaEngine() failing.
-  // Builder::buildCudaEngine() will return nullptr and we will catch the
+  // TensorRT doesn't need ranges for intermediate tensors when layers are fused
-  // error at that point.
+  // so find fused layers first.
-  if (!use_calibration()) {
+  // Get all tensors from network and deduce fused ops.
-    // Get all tensors from network
+  std::map<nvinfer1::ILayer*, std::vector<nvinfer1::ILayer*>> layer_consumers;
  std::map<nvinfer1::ITensor*, nvinfer1::ILayer*> tensor_layer;
  std::set<nvinfer1::ITensor*> all_tensors;
  for (int i = 0; i < this->network()->getNbLayers(); i++) {
    nvinfer1::ILayer* layer = this->network()->getLayer(i);
    layer_consumers[layer] = {};
    for (int j = 0; j < layer->getNbInputs(); j++) {
      all_tensors.insert(layer->getInput(j));
    }
    for (int j = 0; j < layer->getNbOutputs(); j++) {
      tensor_layer[layer->getOutput(j)] = layer;
      all_tensors.insert(layer->getOutput(j));
    }
  }
-    // Find tensors with no ranges
+  for (int i = 0; i < this->network()->getNbLayers(); i++) {
    nvinfer1::ILayer* layer = this->network()->getLayer(i);
    layer_consumers[layer] = {};
    for (int j = 0; j < layer->getNbInputs(); j++) {
      nvinfer1::ITensor* input_tensor = layer->getInput(j);
      auto input_layer = tensor_layer.find(input_tensor);
      if (input_layer != tensor_layer.end()) {
        auto consumed_layer = layer_consumers.find(input_layer->second);
        if (consumed_layer != layer_consumers.end()) {
          consumed_layer->second.push_back(layer);
        }
      }
      all_tensors.insert(input_tensor);
    }
  }
  // Identify fused tensors.
  // Conv+BiasAdd+Activation(Clip or Relu), Conv+BiasAdd,
  // Conv+Activation(Clip or Relu) are fused.
  std::set<nvinfer1::ITensor*> fused_tensors;
  typedef std::function<bool(const nvinfer1::ILayer*)> matcher;
  const std::vector<std::pair<string, std::vector<matcher>>> fused_patterns = {
      {"Fused Conv+Bias+Activation",
       {
           IsConvolution,
           IsScale,
           IsClipOrRelu,
       }},
      {"Fused Conv+Bias",
       {
           IsConvolution,
           IsScale,
       }},
      {"Fused Conv+Activation",
       {
           IsConvolution,
           IsClipOrRelu,
       }},
  };
  for (int i = 0; i < this->network()->getNbLayers(); i++) {
    for (const auto& pattern : fused_patterns) {
      size_t last_matcher = pattern.second.size() - 1;
      nvinfer1::ILayer* layer = this->network()->getLayer(i);
      // We should skip this layer if its outputs are already marked as fused,
      // but all the current patterns start with a convolution and are ordered
      // in decreasing pattern length, so that is not necessary (yet).
      std::vector<nvinfer1::ILayer*> fused_candidates;
      for (size_t index = 0; index <= last_matcher; ++index) {
        if ((!pattern.second[index](layer)) ||
            (index < last_matcher && layer_consumers[layer].size() != 1)) {
          fused_candidates.clear();
          break;
        }
        if (index < last_matcher) {
          fused_candidates.push_back(layer);
        }
        layer = layer_consumers[layer].front();
      }
      if (!fused_candidates.empty()) {
        VLOG(1) << pattern.first;
        for (const auto& fused_layer : fused_candidates) {
          for (int i = 0; i < fused_layer->getNbOutputs(); i++) {
            VLOG(1) << "  Fused output tensor:"
                    << fused_layer->getOutput(i)->getName();
            fused_tensors.insert(fused_layer->getOutput(i));
          }
        }
        break;  // Don't try other patterns on this layer.
      }
    }
  }
  // Find tensors with no ranges that are not fused and force their layers to
  // not be quantized.
  for (auto tensor : all_tensors) {
-      if (!quantization_ranges_.count(tensor)) {
+    if (!quantization_ranges_.count(tensor) &&
        fused_tensors.find(tensor) == fused_tensors.end()) {
      // Note: there may be some warnings for "(Unnamed ITensor* N)". These
      // are tensors which are created internally by TF-TRT. The ranges for
      // these unnamed ITensors are always inferred from user provided ranges,
      // thus there will also be a warning for the range(s) the user missed.
      LOG(WARNING) << "Quantization range was not found for "
                   << tensor->getName() << ". "
-                     << "This is okay if TensorRT does not need the range "
+                   << "Setting invalid quantization range.";
-                     << "(e.g. due to node fusion).";
+      // Set the range to something unusable so the engine will fail if it
      // tries to actually use the tensor's range.
      tensor->setDynamicRange(0, 0);
      auto layer = tensor_layer.find(tensor);
      // If the tensor is the output of a layer, set the layer's precision
      // to fp16 so that it isn't quantized.
      // Shuffle doesn't support setting precision.
      if (layer != tensor_layer.end() &&
          layer->second->getType() != nvinfer1::LayerType::kSHUFFLE) {
        VLOG(1) << "And setting layer " << layer->second->getName()
                << " precision to fp16.";
        layer->second->setPrecision(nvinfer1::DataType::kHALF);
      }
    }
  }
@ -1570,7 +1682,7 @@ Status CheckInputsWeights(
                                   " must be a constant, at ", node_def.name());
    }
    // TODO(tmorris): Remove this check and provide a method to automatically
-    // retrive an input as a tensor, converting via CreateConstantLayer if it
+    // retrieve an input as a tensor, converting via CreateConstantLayer if it
    // was originally a weight. We will want a caching mechanism to prevent many
    // duplicate constants from being created.
    if (!inputs_is_weight[i].second && inputs.at(i).is_weights()) {
@ -4670,7 +4782,7 @@ Status ConvertResize(OpConverterParams* params) {
  // return after validation if only validation is requested.
  if (params->validation_only) return Status::OK();
-  // Tranpose tensor from NHWC to NCHW format.
+  // Transpose tensor from NHWC to NCHW format.
  TF_RETURN_IF_ERROR(
      params->converter->TransposeTensor(tensor, {0, 3, 1, 2}, &tensor));