Ensure that all TFLite int16x8 operators check that the zero-point is null

2020-09-11 14:00:00 +01:00 · 2020-09-11 14:00:00 +01:00 · 3e76163839
commit 3e76163839
parent ef9971b6d7
12 changed files with 139 additions and 83 deletions
--- a/tensorflow/lite/kernels/activations.cc
+++ b/tensorflow/lite/kernels/activations.cc
@ -357,6 +357,12 @@ TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
    QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity,
                       &data->output_shift_identity);
  }
+
+  if (output->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+  }
+
  return context->ResizeTensor(context, output,
                               TfLiteIntArrayCopy(input->dims));
 }
@ -585,6 +591,7 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
  }

  if (input->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);

    data->params.exp_lut = data->exp_lut;
--- a/tensorflow/lite/kernels/activations_test.cc
+++ b/tensorflow/lite/kernels/activations_test.cc
@ -552,7 +552,9 @@ TEST(QuantizedActivationsOpTest, LeakyReluUint8) {
 template <TensorType tensor_type, typename integer_dtype>
 void QuantizedActivationsOpTestLeakyRelu() {
  const float kMin = -1;
-  const float kMax = 127.f / 128.f;
+  const float kMax =
+      std::numeric_limits<integer_dtype>::max() /
+      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);

  QuantizedActivationsOpModel m(
      /*input=*/{tensor_type, {5, 5}, 5 * kMin, 5 * kMax}, 0.1);
@ -1219,9 +1221,12 @@ TEST(QuantizedActivationsOpTest, Softmax1DInt8) {
 // Test quantized softmax with int16 input and output. With the same input as in
 // QuantizedActivationsOpTest.Softmax2D, the dequantized output is identical.
 TEST(QuantizedActivationsOpTest, Softmax1DInt16) {
-  QuantizedActivationsOpModel m(1,
-                                /*input=*/{TensorType_INT16, {3}, -3, 3},
-                                /*output_type-*/ TensorType_INT16);
+  const float kMin = -1;
+  const float kMax = 32767.f / 32768.f;
+  QuantizedActivationsOpModel m(
+      1,
+      /*input=*/{TensorType_INT16, {3}, 3 * kMin, 3 * kMax},
+      /*output_type-*/ TensorType_INT16);
  m.SetInput<int16_t>({1, 2, 3});
  m.Invoke();
  EXPECT_THAT(
@ -1231,9 +1236,11 @@ TEST(QuantizedActivationsOpTest, Softmax1DInt16) {
 }

 TEST(QuantizedActivationsOpTest, Softmax1DInt16ZeroElement) {
-  QuantizedActivationsOpModel m(0.1,
-                                /*input=*/{TensorType_INT16, {1}, -1, 1},
-                                TensorType_INT16);
+  const float kMin = -1;
+  const float kMax = 32767.f / 32768.f;
+  QuantizedActivationsOpModel m(
+      0.1,
+      /*input=*/{TensorType_INT16, {1}, 1 * kMin, 1 * kMax}, TensorType_INT16);
  m.SetInput<int16_t>({0});
  m.Invoke();
  EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
@ -1241,9 +1248,12 @@ TEST(QuantizedActivationsOpTest, Softmax1DInt16ZeroElement) {
 }

 TEST(QuantizedActivationsOpTest, Softmax2DInt16) {
-  QuantizedActivationsOpModel m(0.1,
-                                /*input=*/{TensorType_INT16, {2, 4}, -10, 10},
-                                TensorType_INT16);
+  const float kMin = -1;
+  const float kMax = 32767.f / 32768.f;
+  QuantizedActivationsOpModel m(
+      0.1,
+      /*input=*/{TensorType_INT16, {2, 4}, 10 * kMin, 10 * kMax},
+      TensorType_INT16);
  m.SetInput<int16_t>({
      0, -6, 2, 4,   //
      3, -2, 10, 1,  //
@ -1258,9 +1268,10 @@ TEST(QuantizedActivationsOpTest, Softmax2DInt16) {
                  kQuantizedToleranceInt16)));

  // Same input, but a different shape.
-  QuantizedActivationsOpModel m2(0.1,
-                                 /*input=*/{TensorType_INT16, {4, 2}, -10, 10},
-                                 TensorType_INT16);
+  QuantizedActivationsOpModel m2(
+      0.1,
+      /*input=*/{TensorType_INT16, {4, 2}, 10 * kMin, 10 * kMax},
+      TensorType_INT16);
  m2.SetInput<int16_t>({
      0, -6,  //
      2, 4,   //
@ -1280,9 +1291,12 @@ TEST(QuantizedActivationsOpTest, Softmax2DInt16) {
 }

 TEST(QuantizedActivationsOpTest, Softmax3DInt16) {
+  const float kMin = -1;
+  const float kMax = 32767.f / 32768.f;
  QuantizedActivationsOpModel m(
      1,
-      /*input=*/{TensorType_INT16, {1, 2, 4}, -10, 10}, TensorType_INT16);
+      /*input=*/{TensorType_INT16, {1, 2, 4}, 10 * kMin, 10 * kMax},
+      TensorType_INT16);
  m.SetInput<int16_t>({
      0, -6, 2, 4,   // depth = 0
      3, -2, 10, 1,  // depth = 1
@ -1299,7 +1313,8 @@ TEST(QuantizedActivationsOpTest, Softmax3DInt16) {
  // Same input, but a different shape.
  QuantizedActivationsOpModel m2(
      1,
-      /*input=*/{TensorType_INT16, {4, 1, 2}, -10, 10}, TensorType_INT16);
+      /*input=*/{TensorType_INT16, {4, 1, 2}, 10 * kMin, 10 * kMax},
+      TensorType_INT16);
  m2.SetInput<int16_t>({
      0, -6,  //
      2, 4,   //
@ -1321,9 +1336,12 @@ TEST(QuantizedActivationsOpTest, Softmax3DInt16) {
 // Test quantized softmax with int16 input and output. With the same input as in
 // QuantizedActivationsOpTest.Softmax4D, the dequantized output is identical.
 TEST(QuantizedActivationsOpTest, Softmax4DInt16) {
+  const float kMin = -1;
+  const float kMax = 32767.f / 32768.f;
  QuantizedActivationsOpModel m(
      0.1,
-      /*input=*/{TensorType_INT16, {1, 2, 1, 4}, -10, 10}, TensorType_INT16);
+      /*input=*/{TensorType_INT16, {1, 2, 1, 4}, 10 * kMin, 10 * kMax},
+      TensorType_INT16);
  m.SetInput<int16_t>({
      0, -6, 2, 4,   // depth = 0
      3, -2, 10, 1,  // depth = 1
@ -1340,7 +1358,8 @@ TEST(QuantizedActivationsOpTest, Softmax4DInt16) {
  // Same input, but a different shape.
  QuantizedActivationsOpModel m2(
      0.1,
-      /*input=*/{TensorType_INT16, {4, 1, 1, 2}, -10, 10}, TensorType_INT16);
+      /*input=*/{TensorType_INT16, {4, 1, 1, 2}, 10 * kMin, 10 * kMax},
+      TensorType_INT16);
  m2.SetInput<int16_t>({
      0, -6,  //
      2, 4,   //
--- a/tensorflow/lite/kernels/concatenation.cc
+++ b/tensorflow/lite/kernels/concatenation.cc
@ -95,6 +95,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
    }
  }

+  if (input_type == kTfLiteInt16) {
+    // Make sure there all Int16 inputs have a null zero-point.
+    for (int i = 0; i < node->inputs->size; ++i) {
+      const TfLiteTensor* t = GetInput(context, node, i);
+      TF_LITE_ENSURE_EQ(context, t->params.zero_point, 0);
+    }
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+  }
+
  return context->ResizeTensor(context, output, output_size);
 }

--- a/tensorflow/lite/kernels/concatenation_test.cc
+++ b/tensorflow/lite/kernels/concatenation_test.cc
@ -287,8 +287,13 @@ TYPED_TEST_CASE(ConcatenationOpTestTyped, TestTypes);
 TYPED_TEST(ConcatenationOpTestTyped, FourInputsQuantizedInt8) {
  using TestType = typename TestFixture::TestType;

+  const float kMin = -1;
+  const float kMax =
+      std::numeric_limits<TestType>::max() /
+      static_cast<float>(std::numeric_limits<TestType>::max() + 1);
+
  QuantizedConcatenationOpModel m0(
-      {TestFixture::tensor_type, {2, 1, 2}, -12.7, 12.8},
+      {TestFixture::tensor_type, {2, 1, 2}, 12.8 * kMin, 12.8 * kMax},
      /*axis=*/2,
      /*num_inputs=*/4);

@ -302,20 +307,6 @@ TYPED_TEST(ConcatenationOpTestTyped, FourInputsQuantizedInt8) {
                  1, 3, 1.1, 3.1, 1.2, 3.2, 1.3, 3.3,  //
                  4, 7, 4.1, 7.1, 4.2, 7.2, 4.3, 7.3   //
              })));
-
-  if (TestFixture::tensor_type == TensorType_INT8) {
-    EXPECT_THAT(m0.GetOutput<int8_t>(), ElementsAreArray({
-                                            9, 29, 10, 30, 11, 31, 12, 32,   //
-                                            39, 69, 40, 70, 41, 71, 42, 72,  //
-                                        }));
-  }
-
-  if (TestFixture::tensor_type == TensorType_INT16) {
-    EXPECT_THAT(m0.GetOutput<int16_t>(),
-                ElementsAreArray({2441, 7581, 2698, 7838, 2955,    //
-                                  8095, 3212, 8352, 10151, 17861,  //
-                                  10408, 18118, 10665, 18375, 10922, 18632}));
-  }
 }

 TEST(ConcatenationOpTest, FourInputsQuantizedMixedRange) {
--- a/tensorflow/lite/kernels/conv.cc
+++ b/tensorflow/lite/kernels/conv.cc
@ -333,6 +333,11 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context,
                     input_type == kTfLiteInt8 || input_type == kTfLiteInt16);
  TF_LITE_ENSURE_TYPES_EQ(context, output->type, input_type);

+  if (input_type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+  }
+
  const TfLiteTensor* bias = nullptr;

  // TODO(ahentz): At this point the optimized versions require 'bias'. We can
@ -347,8 +352,6 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context,
    } else if (input_type == kTfLiteInt16) {
      TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt64);
      TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
-      TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
-      TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
    } else {
      TF_LITE_ENSURE_TYPES_EQ(context, bias->type, input_type);
    }
--- a/tensorflow/lite/kernels/depthwise_conv.cc
+++ b/tensorflow/lite/kernels/depthwise_conv.cc
@ -128,6 +128,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                   filter->type == data_type || data_type == kTfLiteInt16);
  }

+  if (data_type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+  }
+
  // Filter in DepthwiseConv is expected to be [1, H, W, O].
  TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, 0), 1);

@ -139,8 +144,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
    } else if (data_type == kTfLiteInt16) {
      TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt64);
      TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
-      TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
-      TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
    } else {
      TF_LITE_ENSURE_TYPES_EQ(context, bias->type, data_type);
    }
--- a/tensorflow/lite/kernels/dequantize.cc
+++ b/tensorflow/lite/kernels/dequantize.cc
@ -60,6 +60,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                              op_context.input->type == kTfLiteInt16 ||
                              op_context.input->type == kTfLiteFloat16);

+  if (op_context.input->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, op_context.input->params.zero_point, 0);
+  }
+
  op_context.output->type = kTfLiteFloat32;
  // If the input tensor is constant, we can persist the dequantized value in
  // the output tensor. Otherwise we run dequantize upon each eval.
--- a/tensorflow/lite/kernels/dequantize_test.cc
+++ b/tensorflow/lite/kernels/dequantize_test.cc
@ -108,8 +108,8 @@ TEST(DequantizeOpTest, Float16) {
 }

 TEST(DequantizeOpTest, Int16) {
-  DequantizeOpModel m(TensorType_INT16, {2, 5}, 0.5, -1, 4);
-  m.SetInput<int16_t>({-130, -127, -126, -125, -124, 123, 124, 125, 126, 130});
+  DequantizeOpModel m(TensorType_INT16, {2, 5}, 0.5, 0, 4);
+  m.SetInput<int16_t>({-129, -126, -125, -124, -123, 124, 125, 126, 127, 131});
  m.Invoke();
  EXPECT_THAT(m.GetOutput(),
              ElementsAreArray(ArrayFloatNear(
--- a/tensorflow/lite/kernels/quantize.cc
+++ b/tensorflow/lite/kernels/quantize.cc
@ -133,6 +133,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                       &data->output_shift);
  }

+  if (input->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
+  }
+  if (output->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+  }
+
  return context->ResizeTensor(context, output,
                               TfLiteIntArrayCopy(input->dims));
 }
--- a/tensorflow/lite/kernels/quantize_test.cc
+++ b/tensorflow/lite/kernels/quantize_test.cc
@ -91,26 +91,27 @@ TEST(QuantizeOpTest, INT16) {
                                12700, 12800}));
 }

-// rescale factor is around 2
+// Input scale 1.000000, output scale 0.500000, input zeropoint 0, output
+// zeropoint 0
 TEST(QuantizeOpTest, Int16Int16) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, -16383, 16384},
-                    {TensorType_INT16, {1, 1, 2, 5}, 0, 16384});
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, 0, 0, 1.0, 0},
+                    {TensorType_INT16, {1, 1, 2, 5}, 0, 0, 0.5, 0});

  m.SetInputAndQuantize<int16_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
  m.Invoke();
  EXPECT_THAT(m.GetOutput<int16_t>(),
-              ElementsAreArray({-32764, -32760, -32756, -32752, -32748, -32744,
-                                -32740, -32736, -32732, -32728}));
+              ElementsAreArray({2, 4, 6, 8, 10, 12, 14, 16, 18, 20}));
 }

-// zero point is -1, scale is 0.5
+// Input scale 0.500000, output scale 0.500000, input zeropoint 0, output
+// zeropoint 0
 TEST(QuantizeOpTest, Int16Int16SameScale) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, -16384, 16384},
-                    {TensorType_INT16, {1, 1, 2, 5}, -16384, 16384});
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, 0, 0, 0.5, 0},
+                    {TensorType_INT16, {1, 1, 2, 5}, 0, 0, 0.5, 0});
  m.SetInputAndQuantize<int16_t>({0, 1, 2, 3, 4, 5, 6, 7, 8, 37767});
  m.Invoke();
  EXPECT_THAT(m.GetOutput<int16_t>(),
-              ElementsAreArray({-1, 1, 3, 5, 7, 9, 11, 13, 15, 32767}));
+              ElementsAreArray({0, 2, 4, 6, 8, 10, 12, 14, 16, 32767}));
 }

 // Input scale 0.500000, output scale 0.500000, input zeropoint -1, output
@ -408,24 +409,24 @@ TEST(QuantizeOpTest, Uint8Int8SmallerScale) {
              ElementsAreArray({1, 3, 5, 7, 9, 11, 13, 15, 17, 19}));
 }

-// Input scale 0.500000, output scale 0.500000, input zeropoint -1, output
+// Input scale 0.500000, output scale 0.500000, input zeropoint 0, output
 // zeropoint -1
 TEST(QuantizeOpTest, Int16Int8SameScale) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, -63.5, 64},
-                    {TensorType_INT8, {1, 1, 2, 5}, -63.5, 64});
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, 0, 0, 0.5, 0},
+                    {TensorType_INT8, {1, 1, 2, 5}, 0, 0, 0.5, -1});

-  // Input will quantized to {1,3,5,7,9,11,13,15,17,19}.
+  // Input will quantized to {2,4,6,8,10,12,14,16,18,20}.
  m.SetInputAndQuantize<int16_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
  m.Invoke();
  EXPECT_THAT(m.GetOutput<int8_t>(),
              ElementsAreArray({1, 3, 5, 7, 9, 11, 13, 15, 17, 19}));
 }

-// Input scale 0.500000, output scale 1.000000, input zeropoint -1, output
+// Input scale 0.500000, output scale 1.000000, input zeropoint 0, output
 // zeropoint -1
 TEST(QuantizeOpTest, Int16Int8LargerScale) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, -63.5, 64},
-                    {TensorType_INT8, {1, 1, 2, 5}, -127, 128});
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, 0, 0, 0.5, 0},
+                    {TensorType_INT8, {1, 1, 2, 5}, 0, 0, 1.0, -1});

  m.SetInputAndQuantize<int16_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
  m.Invoke();
@ -433,11 +434,11 @@ TEST(QuantizeOpTest, Int16Int8LargerScale) {
              ElementsAreArray({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
 }

-// Input scale 1.000000, output scale 0.500000, input zeropoint -1, output
+// Input scale 1.000000, output scale 0.500000, input zeropoint 0, output
 // zeropoint -1
 TEST(QuantizeOpTest, Int16Int8SmallerScale) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, -127, 128},
-                    {TensorType_INT8, {1, 1, 2, 5}, -63.5, 64});
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, 0, 0, 1.0, 0},
+                    {TensorType_INT8, {1, 1, 2, 5}, 0, 0, 0.5, -1});

  m.SetInputAndQuantize<int16_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
  m.Invoke();
@ -447,8 +448,8 @@ TEST(QuantizeOpTest, Int16Int8SmallerScale) {

 // Same as previous test, except more data to hit the neon path.
 TEST(QuantizeOpTest, Int16Int8SmallerScaleNeonPath) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 4, 5}, -127, 128},
-                    {TensorType_INT8, {1, 1, 4, 5}, -63.5, 64});
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 4, 5}, 0, 0, 1.0, 0},
+                    {TensorType_INT8, {1, 1, 4, 5}, 0, 0, 0.5, -1});

  m.SetInputAndQuantize<int16_t>(
      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1});
--- a/tensorflow/lite/kernels/reduce.cc
+++ b/tensorflow/lite/kernels/reduce.cc
@ -254,6 +254,12 @@ TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
    QuantizeMultiplier(real_multiplier, &data->multiplier, &exponent);
    data->shift = exponent;
  }
+
+  if (op_context.input->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, op_context.input->params.zero_point, 0);
+    TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point, 0);
+  }
+
  TfLiteTensor* temp_sum = GetTemporary(context, node, /*index=*/2);
  if (!IsConstantTensor(op_context.axis)) {
    SetTensorToDynamic(temp_sum);
--- a/tensorflow/lite/kernels/reduce_test.cc
+++ b/tensorflow/lite/kernels/reduce_test.cc
@ -52,6 +52,24 @@ class BaseOpModel : public SingleOpModel {

  int Input() { return input_; }

+ protected:
+  TensorData& SymmetricInt16Scaling(TensorData& tensor) {
+    // Symmetric range and null zero-point is required for INT16 tensors. As
+    // SingleOpModel::QuantizationParams calculates the scale on an asymmetric
+    // base [int_type::min, int_type::max], manually calculate the scale on a
+    // symmetric range [int_type::min+1, int_type::max] to ensure a null
+    // zero-point.
+    if (tensor.type == TensorType_INT16) {
+      CHECK_EQ(std::abs(tensor.min), tensor.max);
+      tensor.scale = tensor.max / std::numeric_limits<int16_t>::max();
+      tensor.zero_point = 0;
+      tensor.min = 0;
+      tensor.max = 0;
+    }
+
+    return tensor;
+  }
+
 protected:
  int input_;
  int axis_;
@ -61,12 +79,12 @@ class BaseOpModel : public SingleOpModel {
 // Model for the tests case where axis is a const tensor.
 class MeanOpConstModel : public BaseOpModel {
 public:
-  MeanOpConstModel(const TensorData& input, const TensorData& output,
+  MeanOpConstModel(TensorData input, TensorData output,
                   std::initializer_list<int> axis_shape,
                   std::initializer_list<int> axis, bool keep_dims) {
-    input_ = AddInput(input);
+    input_ = AddInput(SymmetricInt16Scaling(input));
    axis_ = AddConstInput(TensorType_INT32, axis, axis_shape);
-    output_ = AddOutput(output);
+    output_ = AddOutput(SymmetricInt16Scaling(output));
    SetBuiltinOp(BuiltinOperator_MEAN, BuiltinOptions_ReducerOptions,
                 CreateReducerOptions(builder_, keep_dims).Union());
    BuildInterpreter({GetShape(input_)});
@ -439,13 +457,10 @@ TEST(ConstUint8MeanOpTest, KeepDims) {

 template <typename integer_type, TensorType tensor_dtype>
 void MeanOpConstModelTest() {
-  float kQuantizedTolerance = GetTolerance<integer_type>(-5.0, 5.0);
+  float kQuantizedTolerance = GetTolerance<integer_type>(-255.0, 255.0);
  std::vector<float> data = {105.0, 71.0, 233.0, 92.0, 227.0, 11.0, 14.0, 43.0};
-
-  float scale = tensor_dtype == TensorType_INT16 ? 255 / 32767.0f : 0.0f;
-
-  MeanOpConstModel m({tensor_dtype, {1, 1, 2, 4}, 0.0, 255.0, scale, 0},
-                     {tensor_dtype, {1, 2, 4}, 0.0, 255.0, scale, 0}, {1}, {1},
+  MeanOpConstModel m({tensor_dtype, {1, 1, 2, 4}, -255.0, 255.0},
+                     {tensor_dtype, {1, 2, 4}, -255, 255.0}, {1}, {1},
                     false);
  m.QuantizeAndPopulate<integer_type>(m.Input(), data);
  m.Invoke();
@ -468,11 +483,8 @@ template <typename integer_type, TensorType tensor_dtype>
 void ConstMeanOpTestNonSameScale() {
  float kQuantizedTolerance = GetTolerance<integer_type>(-5.0, 5.0);
  std::vector<float> data = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8};
-
-  float scale = tensor_dtype == TensorType_INT16 ? 1 / 32767.f : 0.0f;
-
-  MeanOpConstModel m({tensor_dtype, {1, 1, 2, 4}, -1.0, 1.0, scale, 0},
-                     {tensor_dtype, {1, 2}, -5.0, 5.0, scale, 0}, {2}, {1, 3},
+  MeanOpConstModel m({tensor_dtype, {1, 1, 2, 4}, -1.0, 1.0},
+                     {tensor_dtype, {1, 2}, -5.0, 5.0}, {2}, {1, 3},
                     false);
  m.QuantizeAndPopulate<integer_type>(m.Input(), data);
  m.Invoke();
@ -495,15 +507,12 @@ TEST_F(ConstMeanOpTestNonSameScale, NonSpecialAxisNonSameScaleInt16) {
 template <typename integer_type, TensorType tensor_dtype>
 void MeanOpTestQuantizedSameScale() {
  float kQuantizedTolerance = GetTolerance<integer_type>(-5.0, 5.0);
-
-  float scale = tensor_dtype == TensorType_INT16 ? 1 / 32767.f : 0.0f;
-
  std::vector<float> data = {0.1, 0.2, 0.3, 0.4, 0.2, 0.3, 0.4, 0.5, 0.1,
                             0.1, 0.1, 0.1, 0.4, 0.2, 0.2, 0.2, 0.9, 0.9,
                             0.9, 0.9, 0.2, 0.3, 0.7, 0.7, 0.1, 0.1, 0.3,
                             0.3, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4};
-  MeanOpConstModel m({tensor_dtype, {1, 2, 2, 9}, -1.0, 1.0, scale, 0},
-                     {tensor_dtype, {2}, -1.0, 1.0, scale, 0}, {2}, {1, 2},
+  MeanOpConstModel m({tensor_dtype, {1, 2, 2, 9}, -1.0, 1.0},
+                     {tensor_dtype, {2}, -1.0, 1.0}, {2}, {1, 2},
                     true);
  m.QuantizeAndPopulate<integer_type>(m.Input(), data);
  m.Invoke();
@ -527,15 +536,12 @@ TEST_F(MeanOpTestQuantizedSameScale, QuantizedSameScaleInt16) {
 template <typename integer_type, TensorType tensor_dtype>
 void MeanOpTestQuantizedDifferentScale() {
  float kQuantizedTolerance = GetTolerance<integer_type>(-5.0, 5.0);
-
-  float scale = tensor_dtype == TensorType_INT16 ? 1 / 32767.f : 0.0f;
-
  std::vector<float> data = {0.1, 0.2, 0.3, 0.4, 0.2, 0.3, 0.4, 0.5, 0.1,
                             0.1, 0.1, 0.1, 0.4, 0.2, 0.2, 0.2, 0.9, 0.9,
                             0.9, 0.9, 0.2, 0.3, 0.7, 0.7, 0.1, 0.1, 0.3,
                             0.3, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4};
-  MeanOpConstModel m({tensor_dtype, {1, 2, 2, 9}, -1.0, 1.0, scale, 0},
-                     {tensor_dtype, {2}, -4.0, 4.0, scale, 0}, {2}, {1, 2},
+  MeanOpConstModel m({tensor_dtype, {1, 2, 2, 9}, -1.0, 1.0},
+                     {tensor_dtype, {2}, -4.0, 4.0}, {2}, {1, 2},
                     true);
  m.QuantizeAndPopulate<integer_type>(m.Input(), data);
  m.Invoke();