When in int8 no-calibration mode, set layers with no range info to fp16, so
TensorRT doesn't fail to compile the whole segment. PiperOrigin-RevId: 255451092
This commit is contained in:
parent
160e51b6e8
commit
32002bf4ed
@ -1425,6 +1425,32 @@ void Converter::ProvideQuantizationRange(nvinfer1::ITensor* tensor,
|
|||||||
quantization_ranges_[tensor] = symmetric_range;
|
quantization_ranges_[tensor] = symmetric_range;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
bool IsConvolution(const nvinfer1::ILayer* layer) {
|
||||||
|
return layer->getType() == nvinfer1::LayerType::kCONVOLUTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsScale(const nvinfer1::ILayer* layer) {
|
||||||
|
return layer->getType() == nvinfer1::LayerType::kSCALE;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsClipOrRelu(const nvinfer1::ILayer* layer) {
|
||||||
|
if (layer->getType() != nvinfer1::LayerType::kACTIVATION) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto activation_type = static_cast<const nvinfer1::IActivationLayer*>(layer)
|
||||||
|
->getActivationType();
|
||||||
|
#if IS_TRT_VERSION_GE(5, 1, 2, 0)
|
||||||
|
return activation_type == nvinfer1::ActivationType::kRELU ||
|
||||||
|
activation_type == nvinfer1::ActivationType::kCLIP;
|
||||||
|
#else
|
||||||
|
return activation_type == nvinfer1::ActivationType::kRELU;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
void Converter::MaybeApplyQuantizationRanges() {
|
void Converter::MaybeApplyQuantizationRanges() {
|
||||||
if (precision_mode() != TrtPrecisionMode::INT8) return;
|
if (precision_mode() != TrtPrecisionMode::INT8) return;
|
||||||
|
|
||||||
@ -1442,34 +1468,120 @@ void Converter::MaybeApplyQuantizationRanges() {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Warn user about tensors that are missing ranges. If TRT fuses some layers
|
if (use_calibration()) return;
|
||||||
// then these tensors may not actually be required, which is why this is
|
// Attempt to find tensors that are missing ranges, and set the corresponding
|
||||||
// just a warning. If we are still missing ranges even after fusion,
|
// layer's precision to FP16 to avoid Builder::buildCudaEngine() failing.
|
||||||
// Builder::buildCudaEngine() will return nullptr and we will catch the
|
// TensorRT doesn't need ranges for intermediate tensors when layers are fused
|
||||||
// error at that point.
|
// so find fused layers first.
|
||||||
if (!use_calibration()) {
|
// Get all tensors from network and deduce fused ops.
|
||||||
// Get all tensors from network
|
std::map<nvinfer1::ILayer*, std::vector<nvinfer1::ILayer*>> layer_consumers;
|
||||||
|
std::map<nvinfer1::ITensor*, nvinfer1::ILayer*> tensor_layer;
|
||||||
std::set<nvinfer1::ITensor*> all_tensors;
|
std::set<nvinfer1::ITensor*> all_tensors;
|
||||||
for (int i = 0; i < this->network()->getNbLayers(); i++) {
|
for (int i = 0; i < this->network()->getNbLayers(); i++) {
|
||||||
nvinfer1::ILayer* layer = this->network()->getLayer(i);
|
nvinfer1::ILayer* layer = this->network()->getLayer(i);
|
||||||
|
layer_consumers[layer] = {};
|
||||||
for (int j = 0; j < layer->getNbInputs(); j++) {
|
for (int j = 0; j < layer->getNbInputs(); j++) {
|
||||||
all_tensors.insert(layer->getInput(j));
|
all_tensors.insert(layer->getInput(j));
|
||||||
}
|
}
|
||||||
for (int j = 0; j < layer->getNbOutputs(); j++) {
|
for (int j = 0; j < layer->getNbOutputs(); j++) {
|
||||||
|
tensor_layer[layer->getOutput(j)] = layer;
|
||||||
all_tensors.insert(layer->getOutput(j));
|
all_tensors.insert(layer->getOutput(j));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Find tensors with no ranges
|
for (int i = 0; i < this->network()->getNbLayers(); i++) {
|
||||||
|
nvinfer1::ILayer* layer = this->network()->getLayer(i);
|
||||||
|
layer_consumers[layer] = {};
|
||||||
|
for (int j = 0; j < layer->getNbInputs(); j++) {
|
||||||
|
nvinfer1::ITensor* input_tensor = layer->getInput(j);
|
||||||
|
auto input_layer = tensor_layer.find(input_tensor);
|
||||||
|
if (input_layer != tensor_layer.end()) {
|
||||||
|
auto consumed_layer = layer_consumers.find(input_layer->second);
|
||||||
|
if (consumed_layer != layer_consumers.end()) {
|
||||||
|
consumed_layer->second.push_back(layer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
all_tensors.insert(input_tensor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Identify fused tensors.
|
||||||
|
// Conv+BiasAdd+Activation(Clip or Relu), Conv+BiasAdd,
|
||||||
|
// Conv+Activation(Clip or Relu) are fused.
|
||||||
|
std::set<nvinfer1::ITensor*> fused_tensors;
|
||||||
|
typedef std::function<bool(const nvinfer1::ILayer*)> matcher;
|
||||||
|
const std::vector<std::pair<string, std::vector<matcher>>> fused_patterns = {
|
||||||
|
{"Fused Conv+Bias+Activation",
|
||||||
|
{
|
||||||
|
IsConvolution,
|
||||||
|
IsScale,
|
||||||
|
IsClipOrRelu,
|
||||||
|
}},
|
||||||
|
{"Fused Conv+Bias",
|
||||||
|
{
|
||||||
|
IsConvolution,
|
||||||
|
IsScale,
|
||||||
|
}},
|
||||||
|
{"Fused Conv+Activation",
|
||||||
|
{
|
||||||
|
IsConvolution,
|
||||||
|
IsClipOrRelu,
|
||||||
|
}},
|
||||||
|
};
|
||||||
|
for (int i = 0; i < this->network()->getNbLayers(); i++) {
|
||||||
|
for (const auto& pattern : fused_patterns) {
|
||||||
|
size_t last_matcher = pattern.second.size() - 1;
|
||||||
|
nvinfer1::ILayer* layer = this->network()->getLayer(i);
|
||||||
|
// We should skip this layer if its outputs are already marked as fused,
|
||||||
|
// but all the current patterns start with a convolution and are ordered
|
||||||
|
// in decreasing pattern length, so that is not necessary (yet).
|
||||||
|
std::vector<nvinfer1::ILayer*> fused_candidates;
|
||||||
|
for (size_t index = 0; index <= last_matcher; ++index) {
|
||||||
|
if ((!pattern.second[index](layer)) ||
|
||||||
|
(index < last_matcher && layer_consumers[layer].size() != 1)) {
|
||||||
|
fused_candidates.clear();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (index < last_matcher) {
|
||||||
|
fused_candidates.push_back(layer);
|
||||||
|
}
|
||||||
|
layer = layer_consumers[layer].front();
|
||||||
|
}
|
||||||
|
if (!fused_candidates.empty()) {
|
||||||
|
VLOG(1) << pattern.first;
|
||||||
|
for (const auto& fused_layer : fused_candidates) {
|
||||||
|
for (int i = 0; i < fused_layer->getNbOutputs(); i++) {
|
||||||
|
VLOG(1) << " Fused output tensor:"
|
||||||
|
<< fused_layer->getOutput(i)->getName();
|
||||||
|
fused_tensors.insert(fused_layer->getOutput(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break; // Don't try other patterns on this layer.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Find tensors with no ranges that are not fused and force their layers to
|
||||||
|
// not be quantized.
|
||||||
for (auto tensor : all_tensors) {
|
for (auto tensor : all_tensors) {
|
||||||
if (!quantization_ranges_.count(tensor)) {
|
if (!quantization_ranges_.count(tensor) &&
|
||||||
|
fused_tensors.find(tensor) == fused_tensors.end()) {
|
||||||
// Note: there may be some warnings for "(Unnamed ITensor* N)". These
|
// Note: there may be some warnings for "(Unnamed ITensor* N)". These
|
||||||
// are tensors which are created internally by TF-TRT. The ranges for
|
// are tensors which are created internally by TF-TRT. The ranges for
|
||||||
// these unnamed ITensors are always inferred from user provided ranges,
|
// these unnamed ITensors are always inferred from user provided ranges,
|
||||||
// thus there will also be a warning for the range(s) the user missed.
|
// thus there will also be a warning for the range(s) the user missed.
|
||||||
LOG(WARNING) << "Quantization range was not found for "
|
LOG(WARNING) << "Quantization range was not found for "
|
||||||
<< tensor->getName() << ". "
|
<< tensor->getName() << ". "
|
||||||
<< "This is okay if TensorRT does not need the range "
|
<< "Setting invalid quantization range.";
|
||||||
<< "(e.g. due to node fusion).";
|
// Set the range to something unusable so the engine will fail if it
|
||||||
|
// tries to actually use the tensor's range.
|
||||||
|
tensor->setDynamicRange(0, 0);
|
||||||
|
auto layer = tensor_layer.find(tensor);
|
||||||
|
// If the tensor is the output of a layer, set the layer's precision
|
||||||
|
// to fp16 so that it isn't quantized.
|
||||||
|
// Shuffle doesn't support setting precision.
|
||||||
|
if (layer != tensor_layer.end() &&
|
||||||
|
layer->second->getType() != nvinfer1::LayerType::kSHUFFLE) {
|
||||||
|
VLOG(1) << "And setting layer " << layer->second->getName()
|
||||||
|
<< " precision to fp16.";
|
||||||
|
layer->second->setPrecision(nvinfer1::DataType::kHALF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1570,7 +1682,7 @@ Status CheckInputsWeights(
|
|||||||
" must be a constant, at ", node_def.name());
|
" must be a constant, at ", node_def.name());
|
||||||
}
|
}
|
||||||
// TODO(tmorris): Remove this check and provide a method to automatically
|
// TODO(tmorris): Remove this check and provide a method to automatically
|
||||||
// retrive an input as a tensor, converting via CreateConstantLayer if it
|
// retrieve an input as a tensor, converting via CreateConstantLayer if it
|
||||||
// was originally a weight. We will want a caching mechanism to prevent many
|
// was originally a weight. We will want a caching mechanism to prevent many
|
||||||
// duplicate constants from being created.
|
// duplicate constants from being created.
|
||||||
if (!inputs_is_weight[i].second && inputs.at(i).is_weights()) {
|
if (!inputs_is_weight[i].second && inputs.at(i).is_weights()) {
|
||||||
@ -4670,7 +4782,7 @@ Status ConvertResize(OpConverterParams* params) {
|
|||||||
// return after validation if only validation is requested.
|
// return after validation if only validation is requested.
|
||||||
if (params->validation_only) return Status::OK();
|
if (params->validation_only) return Status::OK();
|
||||||
|
|
||||||
// Tranpose tensor from NHWC to NCHW format.
|
// Transpose tensor from NHWC to NCHW format.
|
||||||
TF_RETURN_IF_ERROR(
|
TF_RETURN_IF_ERROR(
|
||||||
params->converter->TransposeTensor(tensor, {0, 3, 1, 2}, &tensor));
|
params->converter->TransposeTensor(tensor, {0, 3, 1, 2}, &tensor));
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user