Fix the op quant trait of tfl.rsqrt and the nudging for the calibration data

PiperOrigin-RevId: 348884204 Change-Id: Iabac61239cf394fa18d491e0da1fb54d17c3b23a
2020-12-23 20:59:14 -08:00 · 2020-12-23 20:59:14 -08:00 · 05303d0dce
commit 05303d0dce
parent 7494180256
3 changed files with 38 additions and 7 deletions
--- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td
+++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td
@ -2649,8 +2649,7 @@ slice `i`, with the first `seq_lengths[i]` slices along dimension

 def TFL_RsqrtOp: TFL_Op<"rsqrt", [NoSideEffect,
                                  TFL_SameFirstOperandAndFirstResultElementType,
-                                  SameOperandsAndResultShape,
-                                  NoQuantizableResult]> {
+                                  SameOperandsAndResultShape]> {
  let summary = "Reciprocal of square root operator";

  let description = [{
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
@ -104,11 +104,16 @@ struct ConvertStatsToQDQs : public OpRewritePattern<quant::StatisticsOp> {
      if (!stats) return failure();

      for (auto it = stats.begin(), e = stats.end(); it != e; ++it) {
-        double min = FloatAttr::getValueAsDouble(*it++);
-        double max = FloatAttr::getValueAsDouble(*it);
-        TensorRangeSanityCheck(op, min, max);
-        mins.push_back(min);
-        maxs.push_back(max);
+        double rmin = FloatAttr::getValueAsDouble(*it++);
+        double rmax = FloatAttr::getValueAsDouble(*it);
+        // The default nudging implementation of mlir quant library might cause
+        // clamping during inference if the calibration range isn't wide enough.
+        // So here we adjust the range to include 0.0.
+        rmin = std::min(rmin, 0.0);
+        rmax = std::max(rmax, 0.0);
+        TensorRangeSanityCheck(op, rmin, rmax);
+        mins.push_back(rmin);
+        maxs.push_back(rmax);
      }
      quant_type =
          quant::fakeQuantAttrsToType(op.getLoc(), num_bits, *op.axis(), mins,
@ -116,6 +121,11 @@ struct ConvertStatsToQDQs : public OpRewritePattern<quant::StatisticsOp> {
    } else if (auto stats = op.layerStats().dyn_cast<DenseFPElementsAttr>()) {
      double rmin = FloatAttr::getValueAsDouble(stats.getValue<APFloat>({0}));
      double rmax = FloatAttr::getValueAsDouble(stats.getValue<APFloat>({1}));
+      // The default nudging implementation of mlir quant library might cause
+      // clamping during inference if the calibration range isn't wide enough.
+      // So here we adjust the range to include 0.0.
+      rmin = std::min(rmin, 0.0);
+      rmax = std::max(rmax, 0.0);
      TensorRangeSanityCheck(op, rmin, rmax);
      quant_type =
          quant::fakeQuantAttrsToType(op.getLoc(), num_bits, rmin, rmax,
--- a/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/prepare-quantize-signed.mlir
@ -56,6 +56,28 @@ func @prepareStatistics(%arg0: tensor<8x4x3xf32>) -> tensor<8x4x3xf32> {
 // CHECK: return %[[dq2]]
 }

+// CHECK-LABEL: prepareStatisticsNudge
+func @prepareStatisticsNudge(%arg0: tensor<8x4x3xf32>) -> tensor<8x4x3xf32> {
+  %0 = "quant.stats"(%arg0) {
+    layerStats = dense<[0.1, 1.0]> : tensor<2xf32>
+  } : (tensor<8x4x3xf32>) -> tensor<8x4x3xf32>
+  %1 = "quant.stats"(%0) {
+    layerStats = dense<[0.1, 1.0]> : tensor<2xf32>,
+    axisStats = dense<[
+      [-1.0, 1.0],
+      [-8.0, -1.0],
+      [-0.5, 0.5]
+    ]> : tensor<3x2xf32>, axis = 2 : i64
+  } : (tensor<8x4x3xf32>) -> tensor<8x4x3xf32>
+  return %1 : tensor<8x4x3xf32>
+
+// CHECK: %[[q1:.*]] = "tfl.quantize"(%arg0) {qtype = tensor<8x4x3x!quant.uniform<i8:f32, 0.0039215686274509803:-128>>}
+// CHECK: %[[dq1:.*]] = "tfl.dequantize"(%[[q1]])
+// CHECK: %[[q2:.*]] = "tfl.quantize"(%[[dq1]]) {qtype = tensor<8x4x3x!quant.uniform<i8:f32:2, {0.0078431372549019607:-1,0.031372549019607843:127,0.0039215686274509803:-1}>>}
+// CHECK: %[[dq2:.*]] = "tfl.dequantize"(%[[q2]])
+// CHECK: return %[[dq2]]
+}
+
 // CHECK-LABEL: preparePrelu
 func @preparePrelu(%arg0: tensor<1x10x10x3xf32>) -> tensor<1x10x10x3xf32> {
  %cst = "tfl.pseudo_const"() {value = dense<[[[1.66394591, 3.61694336, 2.0382936]]]> : tensor<1x1x3xf32>} : () -> tensor<1x1x3xf32>