Merge pull request #43148 from Tessil:toupstream/ensure_all_16x8_operators_check_null_zero_point

PiperOrigin-RevId: 340171268 Change-Id: Ifcd3ea5b57b1c97f9a020270d472bd28751ceb07
2020-11-01 22:28:45 -08:00 · 2020-11-01 22:28:45 -08:00 · 0e48849777
commit 0e48849777
parent c33f948230 2d9820e8f0
12 changed files with 140 additions and 89 deletions
--- a/tensorflow/lite/kernels/activations.cc
+++ b/tensorflow/lite/kernels/activations.cc
@ -364,6 +364,12 @@ TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
    QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity,
                       &data->output_shift_identity);
  }
  if (input->type == kTfLiteInt16 && output->type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
  }
  return context->ResizeTensor(context, output,
                               TfLiteIntArrayCopy(input->dims));
 }
@ -598,6 +604,7 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
  }
  if (input->type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
    data->params.exp_lut = data->exp_lut;
@ -669,8 +676,7 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
  output->type = input->type;
-  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
+  if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
      output->type == kTfLiteInt16) {
    // prelu(x) = x if x >= 0 else x * alpha.
    // So if we translate that for quantized computation:
    //
--- a/tensorflow/lite/kernels/activations_test.cc
+++ b/tensorflow/lite/kernels/activations_test.cc
@ -556,7 +556,9 @@ TEST(QuantizedActivationsOpTest, LeakyReluUint8) {
 template <TensorType tensor_type, typename integer_dtype>
 void QuantizedActivationsOpTestLeakyRelu() {
  const float kMin = -1;
-  const float kMax = 127.f / 128.f;
+  const float kMax =
      std::numeric_limits<integer_dtype>::max() /
      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);
  QuantizedActivationsOpModel m(
      /*input=*/{tensor_type, {5, 5}, 5 * kMin, 5 * kMax}, 0.1);
@ -1307,9 +1309,12 @@ TEST(QuantizedActivationsOpTest, Softmax1DInt8) {
 // Test quantized softmax with int16 input and output. With the same input as in
 // QuantizedActivationsOpTest.Softmax2D, the dequantized output is identical.
 TEST(QuantizedActivationsOpTest, Softmax1DInt16) {
-  QuantizedActivationsOpModel m(1,
+  const float kMin = -1;
-                                /*input=*/{TensorType_INT16, {3}, -3, 3},
+  const float kMax = 32767.f / 32768.f;
-                                /*output_type-*/ TensorType_INT16);
+  QuantizedActivationsOpModel m(
      1,
      /*input=*/{TensorType_INT16, {3}, 3 * kMin, 3 * kMax},
      /*output_type-*/ TensorType_INT16);
  m.SetInput<int16_t>({1, 2, 3});
  m.Invoke();
  EXPECT_THAT(
@ -1319,9 +1324,11 @@ TEST(QuantizedActivationsOpTest, Softmax1DInt16) {
 }
 TEST(QuantizedActivationsOpTest, Softmax1DInt16ZeroElement) {
-  QuantizedActivationsOpModel m(0.1,
+  const float kMin = -1;
-                                /*input=*/{TensorType_INT16, {1}, -1, 1},
+  const float kMax = 32767.f / 32768.f;
-                                TensorType_INT16);
+  QuantizedActivationsOpModel m(
      0.1,
      /*input=*/{TensorType_INT16, {1}, 1 * kMin, 1 * kMax}, TensorType_INT16);
  m.SetInput<int16_t>({0});
  m.Invoke();
  EXPECT_THAT(m.GetDequantizedOutput<int16_t>(),
@ -1329,9 +1336,12 @@ TEST(QuantizedActivationsOpTest, Softmax1DInt16ZeroElement) {
 }
 TEST(QuantizedActivationsOpTest, Softmax2DInt16) {
-  QuantizedActivationsOpModel m(0.1,
+  const float kMin = -1;
-                                /*input=*/{TensorType_INT16, {2, 4}, -10, 10},
+  const float kMax = 32767.f / 32768.f;
-                                TensorType_INT16);
+  QuantizedActivationsOpModel m(
      0.1,
      /*input=*/{TensorType_INT16, {2, 4}, 10 * kMin, 10 * kMax},
      TensorType_INT16);
  m.SetInput<int16_t>({
      0, -6, 2, 4,   //
      3, -2, 10, 1,  //
@ -1346,9 +1356,10 @@ TEST(QuantizedActivationsOpTest, Softmax2DInt16) {
                  kQuantizedToleranceInt16)));
  // Same input, but a different shape.
-  QuantizedActivationsOpModel m2(0.1,
+  QuantizedActivationsOpModel m2(
-                                 /*input=*/{TensorType_INT16, {4, 2}, -10, 10},
+      0.1,
-                                 TensorType_INT16);
+      /*input=*/{TensorType_INT16, {4, 2}, 10 * kMin, 10 * kMax},
      TensorType_INT16);
  m2.SetInput<int16_t>({
      0, -6,  //
      2, 4,   //
@ -1368,9 +1379,12 @@ TEST(QuantizedActivationsOpTest, Softmax2DInt16) {
 }
 TEST(QuantizedActivationsOpTest, Softmax3DInt16) {
  const float kMin = -1;
  const float kMax = 32767.f / 32768.f;
  QuantizedActivationsOpModel m(
      1,
-      /*input=*/{TensorType_INT16, {1, 2, 4}, -10, 10}, TensorType_INT16);
+      /*input=*/{TensorType_INT16, {1, 2, 4}, 10 * kMin, 10 * kMax},
      TensorType_INT16);
  m.SetInput<int16_t>({
      0, -6, 2, 4,   // depth = 0
      3, -2, 10, 1,  // depth = 1
@ -1387,7 +1401,8 @@ TEST(QuantizedActivationsOpTest, Softmax3DInt16) {
  // Same input, but a different shape.
  QuantizedActivationsOpModel m2(
      1,
-      /*input=*/{TensorType_INT16, {4, 1, 2}, -10, 10}, TensorType_INT16);
+      /*input=*/{TensorType_INT16, {4, 1, 2}, 10 * kMin, 10 * kMax},
      TensorType_INT16);
  m2.SetInput<int16_t>({
      0, -6,  //
      2, 4,   //
@ -1409,9 +1424,12 @@ TEST(QuantizedActivationsOpTest, Softmax3DInt16) {
 // Test quantized softmax with int16 input and output. With the same input as in
 // QuantizedActivationsOpTest.Softmax4D, the dequantized output is identical.
 TEST(QuantizedActivationsOpTest, Softmax4DInt16) {
  const float kMin = -1;
  const float kMax = 32767.f / 32768.f;
  QuantizedActivationsOpModel m(
      0.1,
-      /*input=*/{TensorType_INT16, {1, 2, 1, 4}, -10, 10}, TensorType_INT16);
+      /*input=*/{TensorType_INT16, {1, 2, 1, 4}, 10 * kMin, 10 * kMax},
      TensorType_INT16);
  m.SetInput<int16_t>({
      0, -6, 2, 4,   // depth = 0
      3, -2, 10, 1,  // depth = 1
@ -1428,7 +1446,8 @@ TEST(QuantizedActivationsOpTest, Softmax4DInt16) {
  // Same input, but a different shape.
  QuantizedActivationsOpModel m2(
      0.1,
-      /*input=*/{TensorType_INT16, {4, 1, 1, 2}, -10, 10}, TensorType_INT16);
+      /*input=*/{TensorType_INT16, {4, 1, 1, 2}, 10 * kMin, 10 * kMax},
      TensorType_INT16);
  m2.SetInput<int16_t>({
      0, -6,  //
      2, 4,   //
--- a/tensorflow/lite/kernels/concatenation.cc
+++ b/tensorflow/lite/kernels/concatenation.cc
@ -100,6 +100,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
    }
  }
  if (input_type == kTfLiteInt16) {
    // Make sure that all Int16 inputs have a null zero-point.
    for (int i = 0; i < node->inputs->size; ++i) {
      const TfLiteTensor* t = GetInput(context, node, i);
      TF_LITE_ENSURE_EQ(context, t->params.zero_point, 0);
    }
    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
  }
  return context->ResizeTensor(context, output, output_size);
 }
--- a/tensorflow/lite/kernels/concatenation_test.cc
+++ b/tensorflow/lite/kernels/concatenation_test.cc
@ -296,8 +296,13 @@ TYPED_TEST_CASE(ConcatenationOpTestTyped, TestTypes);
 TYPED_TEST(ConcatenationOpTestTyped, FourInputsQuantizedInt8) {
  using TestType = typename TestFixture::TestType;
  const float kMin = -1;
  const float kMax =
      std::numeric_limits<TestType>::max() /
      static_cast<float>(std::numeric_limits<TestType>::max() + 1);
  QuantizedConcatenationOpModel m0(
-      {TestFixture::tensor_type, {2, 1, 2}, -12.7, 12.8},
+      {TestFixture::tensor_type, {2, 1, 2}, 12.8f * kMin, 12.8f * kMax},
      /*axis=*/2,
      /*num_inputs=*/4);
@ -311,20 +316,6 @@ TYPED_TEST(ConcatenationOpTestTyped, FourInputsQuantizedInt8) {
                  1, 3, 1.1, 3.1, 1.2, 3.2, 1.3, 3.3,  //
                  4, 7, 4.1, 7.1, 4.2, 7.2, 4.3, 7.3   //
              })));
  if (TestFixture::tensor_type == TensorType_INT8) {
    EXPECT_THAT(m0.GetOutput<int8_t>(), ElementsAreArray({
                                            9, 29, 10, 30, 11, 31, 12, 32,   //
                                            39, 69, 40, 70, 41, 71, 42, 72,  //
                                        }));
  }
  if (TestFixture::tensor_type == TensorType_INT16) {
    EXPECT_THAT(m0.GetOutput<int16_t>(),
                ElementsAreArray({2441, 7581, 2698, 7838, 2955,    //
                                  8095, 3212, 8352, 10151, 17861,  //
                                  10408, 18118, 10665, 18375, 10922, 18632}));
  }
 }
 TEST(ConcatenationOpTest, FourInputsQuantizedMixedRange) {
--- a/tensorflow/lite/kernels/conv.cc
+++ b/tensorflow/lite/kernels/conv.cc
@ -338,6 +338,11 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context,
                     input_type == kTfLiteInt8 || input_type == kTfLiteInt16);
  TF_LITE_ENSURE_TYPES_EQ(context, output->type, input_type);
  if (input_type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
  }
  const TfLiteTensor* bias = nullptr;
  // TODO(ahentz): At this point the optimized versions require 'bias'. We can
@ -352,8 +357,6 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context,
    } else if (input_type == kTfLiteInt16) {
      TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt64);
      TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
      TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
      TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
    } else {
      TF_LITE_ENSURE_TYPES_EQ(context, bias->type, input_type);
    }
--- a/tensorflow/lite/kernels/depthwise_conv.cc
+++ b/tensorflow/lite/kernels/depthwise_conv.cc
@ -133,6 +133,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                   filter->type == data_type || data_type == kTfLiteInt16);
  }
  if (data_type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
  }
  // Filter in DepthwiseConv is expected to be [1, H, W, O].
  TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, 0), 1);
@ -144,8 +149,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
    } else if (data_type == kTfLiteInt16) {
      TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt64);
      TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
      TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
      TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
    } else {
      TF_LITE_ENSURE_TYPES_EQ(context, bias->type, data_type);
    }
--- a/tensorflow/lite/kernels/dequantize.cc
+++ b/tensorflow/lite/kernels/dequantize.cc
@ -60,6 +60,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                              op_context.input->type == kTfLiteInt16 ||
                              op_context.input->type == kTfLiteFloat16);
  if (op_context.input->type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, op_context.input->params.zero_point, 0);
  }
  op_context.output->type = kTfLiteFloat32;
  // If the input tensor is constant, we can persist the dequantized value in
  // the output tensor. Otherwise we run dequantize upon each eval.
--- a/tensorflow/lite/kernels/dequantize_test.cc
+++ b/tensorflow/lite/kernels/dequantize_test.cc
@ -108,8 +108,8 @@ TEST(DequantizeOpTest, Float16) {
 }
 TEST(DequantizeOpTest, Int16) {
-  DequantizeOpModel m(TensorType_INT16, {2, 5}, 0.5, -1, 4);
+  DequantizeOpModel m(TensorType_INT16, {2, 5}, 0.5, 0, 4);
-  m.SetInput<int16_t>({-130, -127, -126, -125, -124, 123, 124, 125, 126, 130});
+  m.SetInput<int16_t>({-129, -126, -125, -124, -123, 124, 125, 126, 127, 131});
  m.Invoke();
  EXPECT_THAT(m.GetOutput(),
              ElementsAreArray(ArrayFloatNear(
--- a/tensorflow/lite/kernels/quantize.cc
+++ b/tensorflow/lite/kernels/quantize.cc
@ -136,6 +136,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                       &data->output_shift);
  }
  if (input->type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
  }
  if (output->type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
  }
  return context->ResizeTensor(context, output,
                               TfLiteIntArrayCopy(input->dims));
 }
--- a/tensorflow/lite/kernels/quantize_test.cc
+++ b/tensorflow/lite/kernels/quantize_test.cc
@ -92,26 +92,27 @@ TEST(QuantizeOpTest, INT16) {
                                12700, 12800}));
 }
-// rescale factor is around 2
+// Input scale 1.000000, output scale 0.500000, input zeropoint 0, output
 // zeropoint 0
 TEST(QuantizeOpTest, Int16Int16) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, -16383, 16384},
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, 0, 0, 1.0, 0},
-                    {TensorType_INT16, {1, 1, 2, 5}, 0, 16384});
+                    {TensorType_INT16, {1, 1, 2, 5}, 0, 0, 0.5, 0});
  m.SetInputAndQuantize<int16_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
  m.Invoke();
  EXPECT_THAT(m.GetOutput<int16_t>(),
-              ElementsAreArray({-32764, -32760, -32756, -32752, -32748, -32744,
+              ElementsAreArray({2, 4, 6, 8, 10, 12, 14, 16, 18, 20}));
                                -32740, -32736, -32732, -32728}));
 }
-// zero point is -1, scale is 0.5
+// Input scale 0.500000, output scale 0.500000, input zeropoint 0, output
 // zeropoint 0
 TEST(QuantizeOpTest, Int16Int16SameScale) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, -16384, 16384},
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, 0, 0, 0.5, 0},
-                    {TensorType_INT16, {1, 1, 2, 5}, -16384, 16384});
+                    {TensorType_INT16, {1, 1, 2, 5}, 0, 0, 0.5, 0});
  m.SetInputAndQuantize<int16_t>({0, 1, 2, 3, 4, 5, 6, 7, 8, 37767});
  m.Invoke();
  EXPECT_THAT(m.GetOutput<int16_t>(),
-              ElementsAreArray({-1, 1, 3, 5, 7, 9, 11, 13, 15, 32767}));
+              ElementsAreArray({0, 2, 4, 6, 8, 10, 12, 14, 16, 32767}));
 }
 // Input scale 0.500000, output scale 0.500000, input zeropoint -1, output
@ -409,24 +410,24 @@ TEST(QuantizeOpTest, Uint8Int8SmallerScale) {
              ElementsAreArray({1, 3, 5, 7, 9, 11, 13, 15, 17, 19}));
 }
-// Input scale 0.500000, output scale 0.500000, input zeropoint -1, output
+// Input scale 0.500000, output scale 0.500000, input zeropoint 0, output
 // zeropoint -1
 TEST(QuantizeOpTest, Int16Int8SameScale) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, -63.5, 64},
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, 0, 0, 0.5, 0},
-                    {TensorType_INT8, {1, 1, 2, 5}, -63.5, 64});
+                    {TensorType_INT8, {1, 1, 2, 5}, 0, 0, 0.5, -1});
-  // Input will quantized to {1,3,5,7,9,11,13,15,17,19}.
+  // Input will quantized to {2,4,6,8,10,12,14,16,18,20}.
  m.SetInputAndQuantize<int16_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
  m.Invoke();
  EXPECT_THAT(m.GetOutput<int8_t>(),
              ElementsAreArray({1, 3, 5, 7, 9, 11, 13, 15, 17, 19}));
 }
-// Input scale 0.500000, output scale 1.000000, input zeropoint -1, output
+// Input scale 0.500000, output scale 1.000000, input zeropoint 0, output
 // zeropoint -1
 TEST(QuantizeOpTest, Int16Int8LargerScale) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, -63.5, 64},
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, 0, 0, 0.5, 0},
-                    {TensorType_INT8, {1, 1, 2, 5}, -127, 128});
+                    {TensorType_INT8, {1, 1, 2, 5}, 0, 0, 1.0, -1});
  m.SetInputAndQuantize<int16_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
  m.Invoke();
@ -434,11 +435,11 @@ TEST(QuantizeOpTest, Int16Int8LargerScale) {
              ElementsAreArray({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}));
 }
-// Input scale 1.000000, output scale 0.500000, input zeropoint -1, output
+// Input scale 1.000000, output scale 0.500000, input zeropoint 0, output
 // zeropoint -1
 TEST(QuantizeOpTest, Int16Int8SmallerScale) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, -127, 128},
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 2, 5}, 0, 0, 1.0, 0},
-                    {TensorType_INT8, {1, 1, 2, 5}, -63.5, 64});
+                    {TensorType_INT8, {1, 1, 2, 5}, 0, 0, 0.5, -1});
  m.SetInputAndQuantize<int16_t>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10});
  m.Invoke();
@ -448,8 +449,8 @@ TEST(QuantizeOpTest, Int16Int8SmallerScale) {
 // Same as previous test, except more data to hit the neon path.
 TEST(QuantizeOpTest, Int16Int8SmallerScaleNeonPath) {
-  QuantizeOpModel m({TensorType_INT16, {1, 1, 4, 5}, -127, 128},
+  QuantizeOpModel m({TensorType_INT16, {1, 1, 4, 5}, 0, 0, 1.0, 0},
-                    {TensorType_INT8, {1, 1, 4, 5}, -63.5, 64});
+                    {TensorType_INT8, {1, 1, 4, 5}, 0, 0, 0.5, -1});
  m.SetInputAndQuantize<int16_t>(
      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1});
--- a/tensorflow/lite/kernels/reduce.cc
+++ b/tensorflow/lite/kernels/reduce.cc
@ -263,6 +263,12 @@ TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
    QuantizeMultiplier(real_multiplier, &data->multiplier, &exponent);
    data->shift = exponent;
  }
  if (op_context.input->type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, op_context.input->params.zero_point, 0);
    TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point, 0);
  }
  TfLiteTensor* temp_sum;
  TF_LITE_ENSURE_OK(context,
                    GetTemporarySafe(context, node, /*index=*/2, &temp_sum));
--- a/tensorflow/lite/kernels/reduce_test.cc
+++ b/tensorflow/lite/kernels/reduce_test.cc
@ -52,6 +52,24 @@ class BaseOpModel : public SingleOpModel {
  int Input() { return input_; }
 protected:
  TensorData& SymmetricInt16Scaling(TensorData& tensor) {
    // Symmetric range and null zero-point is required for INT16 tensors. As
    // SingleOpModel::QuantizationParams calculates the scale on an asymmetric
    // base [int_type::min, int_type::max], manually calculate the scale on a
    // symmetric range [int_type::min+1, int_type::max] to ensure a null
    // zero-point.
    if (tensor.type == TensorType_INT16) {
      CHECK_EQ(std::abs(tensor.min), tensor.max);
      tensor.scale = tensor.max / std::numeric_limits<int16_t>::max();
      tensor.zero_point = 0;
      tensor.min = 0;
      tensor.max = 0;
    }
    return tensor;
  }
 protected:
  int input_;
  int axis_;
@ -61,12 +79,12 @@ class BaseOpModel : public SingleOpModel {
 // Model for the tests case where axis is a const tensor.
 class MeanOpConstModel : public BaseOpModel {
 public:
-  MeanOpConstModel(const TensorData& input, const TensorData& output,
+  MeanOpConstModel(TensorData input, TensorData output,
                   std::initializer_list<int> axis_shape,
                   std::initializer_list<int> axis, bool keep_dims) {
-    input_ = AddInput(input);
+    input_ = AddInput(SymmetricInt16Scaling(input));
    axis_ = AddConstInput(TensorType_INT32, axis, axis_shape);
-    output_ = AddOutput(output);
+    output_ = AddOutput(SymmetricInt16Scaling(output));
    SetBuiltinOp(BuiltinOperator_MEAN, BuiltinOptions_ReducerOptions,
                 CreateReducerOptions(builder_, keep_dims).Union());
    BuildInterpreter({GetShape(input_)});
@ -450,14 +468,10 @@ TEST(ConstUint8MeanOpTest, KeepDims) {
 template <typename integer_type, TensorType tensor_dtype>
 void MeanOpConstModelTest() {
-  float kQuantizedTolerance = GetTolerance<integer_type>(-5.0, 5.0);
+  float kQuantizedTolerance = GetTolerance<integer_type>(-255.0, 255.0);
  std::vector<float> data = {105.0, 71.0, 233.0, 92.0, 227.0, 11.0, 14.0, 43.0};
-
+  MeanOpConstModel m({tensor_dtype, {1, 1, 2, 4}, -255.0, 255.0},
-  float scale = tensor_dtype == TensorType_INT16 ? 255 / 32767.0f : 0.0f;
+                     {tensor_dtype, {1, 2, 4}, -255, 255.0}, {1}, {1}, false);
  MeanOpConstModel m({tensor_dtype, {1, 1, 2, 4}, 0.0, 255.0, scale, 0},
                     {tensor_dtype, {1, 2, 4}, 0.0, 255.0, scale, 0}, {1}, {1},
                     false);
  m.QuantizeAndPopulate<integer_type>(m.Input(), data);
  m.Invoke();
  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2, 4}));
@ -479,12 +493,8 @@ template <typename integer_type, TensorType tensor_dtype>
 void ConstMeanOpTestNonSameScale() {
  float kQuantizedTolerance = GetTolerance<integer_type>(-5.0, 5.0);
  std::vector<float> data = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8};
-
+  MeanOpConstModel m({tensor_dtype, {1, 1, 2, 4}, -1.0, 1.0},
-  float scale = tensor_dtype == TensorType_INT16 ? 1 / 32767.f : 0.0f;
+                     {tensor_dtype, {1, 2}, -5.0, 5.0}, {2}, {1, 3}, false);
  MeanOpConstModel m({tensor_dtype, {1, 1, 2, 4}, -1.0, 1.0, scale, 0},
                     {tensor_dtype, {1, 2}, -5.0, 5.0, scale, 0}, {2}, {1, 3},
                     false);
  m.QuantizeAndPopulate<integer_type>(m.Input(), data);
  m.Invoke();
  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
@ -506,16 +516,12 @@ TEST_F(ConstMeanOpTestNonSameScale, NonSpecialAxisNonSameScaleInt16) {
 template <typename integer_type, TensorType tensor_dtype>
 void MeanOpTestQuantizedSameScale() {
  float kQuantizedTolerance = GetTolerance<integer_type>(-5.0, 5.0);
  float scale = tensor_dtype == TensorType_INT16 ? 1 / 32767.f : 0.0f;
  std::vector<float> data = {0.1, 0.2, 0.3, 0.4, 0.2, 0.3, 0.4, 0.5, 0.1,
                             0.1, 0.1, 0.1, 0.4, 0.2, 0.2, 0.2, 0.9, 0.9,
                             0.9, 0.9, 0.2, 0.3, 0.7, 0.7, 0.1, 0.1, 0.3,
                             0.3, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4};
-  MeanOpConstModel m({tensor_dtype, {1, 2, 2, 9}, -1.0, 1.0, scale, 0},
+  MeanOpConstModel m({tensor_dtype, {1, 2, 2, 9}, -1.0, 1.0},
-                     {tensor_dtype, {2}, -1.0, 1.0, scale, 0}, {2}, {1, 2},
+                     {tensor_dtype, {2}, -1.0, 1.0}, {2}, {1, 2}, true);
                     true);
  m.QuantizeAndPopulate<integer_type>(m.Input(), data);
  m.Invoke();
  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 9}));
@ -538,16 +544,12 @@ TEST_F(MeanOpTestQuantizedSameScale, QuantizedSameScaleInt16) {
 template <typename integer_type, TensorType tensor_dtype>
 void MeanOpTestQuantizedDifferentScale() {
  float kQuantizedTolerance = GetTolerance<integer_type>(-5.0, 5.0);
  float scale = tensor_dtype == TensorType_INT16 ? 1 / 32767.f : 0.0f;
  std::vector<float> data = {0.1, 0.2, 0.3, 0.4, 0.2, 0.3, 0.4, 0.5, 0.1,
                             0.1, 0.1, 0.1, 0.4, 0.2, 0.2, 0.2, 0.9, 0.9,
                             0.9, 0.9, 0.2, 0.3, 0.7, 0.7, 0.1, 0.1, 0.3,
                             0.3, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4};
-  MeanOpConstModel m({tensor_dtype, {1, 2, 2, 9}, -1.0, 1.0, scale, 0},
+  MeanOpConstModel m({tensor_dtype, {1, 2, 2, 9}, -1.0, 1.0},
-                     {tensor_dtype, {2}, -4.0, 4.0, scale, 0}, {2}, {1, 2},
+                     {tensor_dtype, {2}, -4.0, 4.0}, {2}, {1, 2}, true);
                     true);
  m.QuantizeAndPopulate<integer_type>(m.Input(), data);
  m.Invoke();
  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 9}));