diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 53a50863e92..89c0e3520b7 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -2649,8 +2649,7 @@ slice `i`, with the first `seq_lengths[i]` slices along dimension def TFL_RsqrtOp: TFL_Op<"rsqrt", [NoSideEffect, TFL_SameFirstOperandAndFirstResultElementType, - SameOperandsAndResultShape, - NoQuantizableResult]> { + SameOperandsAndResultShape]> { let summary = "Reciprocal of square root operator"; let description = [{ diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h index 0ee01b5ad45..48b0db6613a 100644 --- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h +++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h @@ -104,11 +104,16 @@ struct ConvertStatsToQDQs : public OpRewritePattern { if (!stats) return failure(); for (auto it = stats.begin(), e = stats.end(); it != e; ++it) { - double min = FloatAttr::getValueAsDouble(*it++); - double max = FloatAttr::getValueAsDouble(*it); - TensorRangeSanityCheck(op, min, max); - mins.push_back(min); - maxs.push_back(max); + double rmin = FloatAttr::getValueAsDouble(*it++); + double rmax = FloatAttr::getValueAsDouble(*it); + // The default nudging implementation of mlir quant library might cause + // clamping during inference if the calibration range isn't wide enough. + // So here we adjust the range to include 0.0. + rmin = std::min(rmin, 0.0); + rmax = std::max(rmax, 0.0); + TensorRangeSanityCheck(op, rmin, rmax); + mins.push_back(rmin); + maxs.push_back(rmax); } quant_type = quant::fakeQuantAttrsToType(op.getLoc(), num_bits, *op.axis(), mins, @@ -116,6 +121,11 @@ struct ConvertStatsToQDQs : public OpRewritePattern { } else if (auto stats = op.layerStats().dyn_cast()) { double rmin = FloatAttr::getValueAsDouble(stats.getValue({0})); double rmax = FloatAttr::getValueAsDouble(stats.getValue({1})); + // The default nudging implementation of mlir quant library might cause + // clamping during inference if the calibration range isn't wide enough. + // So here we adjust the range to include 0.0. + rmin = std::min(rmin, 0.0); + rmax = std::max(rmax, 0.0); TensorRangeSanityCheck(op, rmin, rmax); quant_type = quant::fakeQuantAttrsToType(op.getLoc(), num_bits, rmin, rmax, diff --git a/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir b/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir index e08eb5308d1..6288bd1213c 100644 --- a/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir +++ b/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir @@ -56,6 +56,28 @@ func @prepareStatistics(%arg0: tensor<8x4x3xf32>) -> tensor<8x4x3xf32> { // CHECK: return %[[dq2]] } +// CHECK-LABEL: prepareStatisticsNudge +func @prepareStatisticsNudge(%arg0: tensor<8x4x3xf32>) -> tensor<8x4x3xf32> { + %0 = "quant.stats"(%arg0) { + layerStats = dense<[0.1, 1.0]> : tensor<2xf32> + } : (tensor<8x4x3xf32>) -> tensor<8x4x3xf32> + %1 = "quant.stats"(%0) { + layerStats = dense<[0.1, 1.0]> : tensor<2xf32>, + axisStats = dense<[ + [-1.0, 1.0], + [-8.0, -1.0], + [-0.5, 0.5] + ]> : tensor<3x2xf32>, axis = 2 : i64 + } : (tensor<8x4x3xf32>) -> tensor<8x4x3xf32> + return %1 : tensor<8x4x3xf32> + +// CHECK: %[[q1:.*]] = "tfl.quantize"(%arg0) {qtype = tensor<8x4x3x!quant.uniform>} +// CHECK: %[[dq1:.*]] = "tfl.dequantize"(%[[q1]]) +// CHECK: %[[q2:.*]] = "tfl.quantize"(%[[dq1]]) {qtype = tensor<8x4x3x!quant.uniform>} +// CHECK: %[[dq2:.*]] = "tfl.dequantize"(%[[q2]]) +// CHECK: return %[[dq2]] +} + // CHECK-LABEL: preparePrelu func @preparePrelu(%arg0: tensor<1x10x10x3xf32>) -> tensor<1x10x10x3xf32> { %cst = "tfl.pseudo_const"() {value = dense<[[[1.66394591, 3.61694336, 2.0382936]]]> : tensor<1x1x3xf32>} : () -> tensor<1x1x3xf32>