Merge pull request #43618 from Tessil:toupstream/16x8_reduce_min_reduce_max

PiperOrigin-RevId: 341600565 Change-Id: Ia0dedf522526d220863e6a8e37fc9df68827d52e
2020-11-10 05:44:46 -08:00 · 2020-11-10 05:44:46 -08:00 · 641032d2c5
commit 641032d2c5
parent a6241f0271 5f1adf36f5
8 changed files with 273 additions and 221 deletions
--- a/RELEASE.md
+++ b/RELEASE.md
@ -45,6 +45,9 @@
        *   Removed deprecated `Interpreter::UseNNAPI(bool)` C++ API.
            *   Use `NnApiDelegate()` and related delegate configuration methods
                directly.
+    *  16 bits quantization
+        *   Added int16x8 support for REDUCE_MIN and REDUCE_MAX operators.
+
 *   TF Core:
    *   Corrected higher-order gradients of control flow constructs (`tf.cond`,
        `tf.while_loop`, and compositions like `tf.foldl`) computed with
--- a/tensorflow/lite/kernels/reduce.cc
+++ b/tensorflow/lite/kernels/reduce.cc
@ -223,6 +223,11 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
  TF_LITE_ENSURE_TYPES_EQ(context, op_context.axis->type, kTfLiteInt32);
  TF_LITE_ENSURE_OK(context, InitializeTemporaries(context, node, &op_context));

+  if (op_context.input->type == kTfLiteInt16) {
+    TF_LITE_ENSURE_EQ(context, op_context.input->params.zero_point, 0);
+    TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point, 0);
+  }
+
  TfLiteTensor* resolved_axis;
  TF_LITE_ENSURE_OK(
      context, GetTemporarySafe(context, node, /*index=*/1, &resolved_axis));
@ -541,7 +546,8 @@ TfLiteStatus EvalLogic(TfLiteContext* context, TfLiteNode* node,
    if (input->dims->data[i] == 0) return kTfLiteOk;
  }

-  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
+  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8 ||
+      input->type == kTfLiteInt16) {
    TF_LITE_ENSURE_EQ(context, input->params.scale,
                      op_context->output->params.scale);
    TF_LITE_ENSURE_EQ(context, input->params.zero_point,
@ -641,6 +647,9 @@ TfLiteStatus EvalGeneric(TfLiteContext* context, TfLiteNode* node) {
    case kTfLiteInt8:
      return EvalType<int8_t>(context, node, &op_context, reduce_type);
      break;
+    case kTfLiteInt16:
+      return EvalType<int16_t>(context, node, &op_context, reduce_type);
+      break;
    case kTfLiteBool:
      return EvalType<bool>(context, node, &op_context, reduce_type);
      break;
--- a/tensorflow/lite/kernels/reduce_test.cc
+++ b/tensorflow/lite/kernels/reduce_test.cc
@ -1000,148 +1000,163 @@ TEST(DynamicFloatMaxOpTest, Scale) {
  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray(ArrayFloatNear({9.527})));
 }

-TEST(ConstUint8MaxOpTest, NotKeepDims) {
-  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+template <TensorType tensor_type, typename integer_dtype>
+void ConstMaxOpTestNotKeepDims() {
+  const float kMin = -1;
+  const float kMax =
+      std::numeric_limits<integer_dtype>::max() /
+      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);
+  const float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
-  MaxOpConstModel m({TensorType_UINT8, {1, 3, 2}, -1.0, 1.0},
-                    {TensorType_UINT8, {2}, -1.0, 1.0}, {1}, {1}, false);
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
+  MaxOpConstModel m({tensor_type, {1, 3, 2}, 1.0f * kMin, 1.0f * kMax},
+                    {tensor_type, {2}, 1.0f * kMin, 1.0f * kMax}, {1}, {1},
+                    false);
+  m.QuantizeAndPopulate<integer_dtype>(m.Input(), data);
  m.Invoke();
  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
-              ElementsAreArray(
-                  ArrayFloatNear({0.501961, 0.603922}, kQuantizedTolerance)));
+  EXPECT_THAT(
+      m.GetDequantizedOutput<integer_dtype>(),
+      ElementsAreArray(ArrayFloatNear({0.5, 0.6}, kQuantizedTolerance)));
+}
+
+TEST(ConstUint8MaxOpTest, NotKeepDims) {
+  ConstMaxOpTestNotKeepDims<TensorType_UINT8, uint8_t>();
 }

 TEST(ConstInt8MaxOpTest, NotKeepDims) {
-  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  ConstMaxOpTestNotKeepDims<TensorType_INT8, int8_t>();
+}
+
+TEST(ConstInt16MaxOpTest, NotKeepDims) {
+  ConstMaxOpTestNotKeepDims<TensorType_INT16, int16_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void ConstMaxOpTestKeepDims() {
+  const float kMin = -1;
+  const float kMax =
+      std::numeric_limits<integer_dtype>::max() /
+      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);
+  const float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
-  MaxOpConstModel m({TensorType_INT8, {1, 3, 2}, -1.0, 1.0},
-                    {TensorType_INT8, {2}, -1.0, 1.0}, {1}, {1}, false);
-  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  MaxOpConstModel m({tensor_type, {3, 2}, 1.0f * kMin, 1.0f * kMax},
+                    {tensor_type, {3}, 1.0f * kMin, 1.0f * kMax}, {1}, {1},
+                    true);
+  m.QuantizeAndPopulate<integer_dtype>(m.Input(), data);
  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
-              ElementsAreArray(
-                  ArrayFloatNear({0.501961, 0.603922}, kQuantizedTolerance)));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
+  EXPECT_THAT(
+      m.GetDequantizedOutput<integer_dtype>(),
+      ElementsAreArray(ArrayFloatNear({0.4, 0.4, 0.6}, kQuantizedTolerance)));
 }

 TEST(ConstUint8MaxOpTest, KeepDims) {
-  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
-  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
-  MaxOpConstModel m({TensorType_UINT8, {3, 2}, -1.0, 1.0},
-                    {TensorType_UINT8, {3}, -1.0, 1.0}, {1}, {1}, true);
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
-  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
-  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
-              ElementsAreArray(
-                  ArrayFloatNear({0.4, 0.4, 0.603922}, kQuantizedTolerance)));
+  ConstMaxOpTestKeepDims<TensorType_UINT8, uint8_t>();
 }

 TEST(ConstInt8MaxOpTest, KeepDims) {
-  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
-  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
-  MaxOpConstModel m({TensorType_INT8, {3, 2}, -1.0, 1.0},
-                    {TensorType_INT8, {3}, -1.0, 1.0}, {1}, {1}, true);
-  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  ConstMaxOpTestKeepDims<TensorType_INT8, int8_t>();
+}
+
+TEST(ConstInt16MaxOpTest, KeepDims) {
+  ConstMaxOpTestKeepDims<TensorType_INT16, int16_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void DynamicMaxOpTestNotKeepDims() {
+  const float kMin = -1;
+  const float kMax =
+      std::numeric_limits<integer_dtype>::max() /
+      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);
+  const float kQuantizedTolerance = GetTolerance<integer_dtype>(-5.0, 5.0);
+  std::vector<float> data = {1.3, -4.8, -3.6, 0.24};
+  MaxOpDynamicModel m({tensor_type, {2, 2}, 5.0f * kMin, 5.0f * kMax},
+                      {tensor_type, {2}, 5.0f * kMin, 5.0f * kMax},
+                      {TensorType_INT32, {1}}, false);
+  std::vector<int> axis = {1};
+  m.SetAxis(axis);
+  m.QuantizeAndPopulate<integer_dtype>(m.Input(), data);
  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
-  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
-              ElementsAreArray(
-                  ArrayFloatNear({0.4, 0.4, 0.603922}, kQuantizedTolerance)));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
+  EXPECT_THAT(
+      m.GetDequantizedOutput<integer_dtype>(),
+      ElementsAreArray(ArrayFloatNear({1.3, 0.24}, kQuantizedTolerance)));
 }

 TEST(DynamicUint8MaxOpTest, NotKeepDims) {
-  float kQuantizedTolerance = GetTolerance(-5.0, 2.0);
-  std::vector<float> data = {1.3, -4.8, -3.6, 0.24};
-  MaxOpDynamicModel m({TensorType_UINT8, {2, 2}, -5.0, 2.0},
-                      {TensorType_UINT8, {2}, -5.0, 2.0},
-                      {TensorType_INT32, {1}}, false);
-  std::vector<int> axis = {1};
-  m.SetAxis(axis);
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
-  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
-  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
-              ElementsAreArray(
-                  ArrayFloatNear({1.2902, 0.247059}, kQuantizedTolerance)));
+  DynamicMaxOpTestNotKeepDims<TensorType_UINT8, uint8_t>();
 }

 TEST(DynamicInt8MaxOpTest, NotKeepDims) {
-  float kQuantizedTolerance = GetTolerance(-5.0, 2.0);
-  std::vector<float> data = {1.3, -4.8, -3.6, 0.24};
-  MaxOpDynamicModel m({TensorType_INT8, {2, 2}, -5.0, 2.0},
-                      {TensorType_INT8, {2}, -5.0, 2.0},
-                      {TensorType_INT32, {1}}, false);
-  std::vector<int> axis = {1};
+  DynamicMaxOpTestNotKeepDims<TensorType_INT8, int8_t>();
+}
+
+TEST(DynamicInt16MaxOpTest, NotKeepDims) {
+  DynamicMaxOpTestNotKeepDims<TensorType_INT16, int16_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void DynamicMaxOpTestKeepDims() {
+  const float kMin = -1;
+  const float kMax =
+      std::numeric_limits<integer_dtype>::max() /
+      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);
+  const float kQuantizedTolerance = GetTolerance<integer_dtype>(-12.0, 12.0);
+  std::vector<float> data = {11.14, -0.14, 7.423, 0.879};
+  MaxOpDynamicModel m({tensor_type, {2, 2}, 12.0f * kMin, 12.0f * kMax},
+                      {tensor_type, {2}, 12.0f * kMin, 12.0f * kMax},
+                      {TensorType_INT32, {1}}, true);
+  std::vector<int> axis = {0};
  m.SetAxis(axis);
-  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.QuantizeAndPopulate<integer_dtype>(m.Input(), data);
  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
-  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
-              ElementsAreArray(
-                  ArrayFloatNear({1.2902, 0.247059}, kQuantizedTolerance)));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
+  EXPECT_THAT(
+      m.GetDequantizedOutput<integer_dtype>(),
+      ElementsAreArray(ArrayFloatNear({11.14, 0.879}, kQuantizedTolerance)));
 }

 TEST(DynamicUint8MaxOpTest, KeepDims) {
-  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
-  std::vector<float> data = {11.14, -0.14, 7.423, 0.879};
-  MaxOpDynamicModel m({TensorType_UINT8, {2, 2}, -10.0, 12.0},
-                      {TensorType_UINT8, {2}, -10.0, 12.0},
-                      {TensorType_INT32, {1}}, true);
-  std::vector<int> axis = {0};
-  m.SetAxis(axis);
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
-  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
-              ElementsAreArray(
-                  ArrayFloatNear({11.1294, 0.862745}, kQuantizedTolerance)));
+  DynamicMaxOpTestKeepDims<TensorType_UINT8, uint8_t>();
 }

 TEST(DynamicInt8MaxOpTest, KeepDims) {
-  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
-  std::vector<float> data = {11.14, -0.14, 7.423, 0.879};
-  MaxOpDynamicModel m({TensorType_INT8, {2, 2}, -10.0, 12.0},
-                      {TensorType_INT8, {2}, -10.0, 12.0},
+  DynamicMaxOpTestKeepDims<TensorType_INT8, int8_t>();
+}
+
+TEST(DynamicInt16MaxOpTest, KeepDims) {
+  DynamicMaxOpTestKeepDims<TensorType_INT16, int16_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void DynamicMaxOpTestScalar() {
+  const float kMin = -1;
+  const float kMax =
+      std::numeric_limits<integer_dtype>::max() /
+      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);
+  const float kQuantizedTolerance = GetTolerance<integer_dtype>(-12.0, 12.0);
+  std::vector<float> data = {11.14};
+  MaxOpDynamicModel m({tensor_type, {}, 12.0f * kMin, 12.0f * kMax},
+                      {tensor_type, {}, 12.0f * kMin, 12.0f * kMax},
                      {TensorType_INT32, {1}}, true);
  std::vector<int> axis = {0};
-  m.SetAxis(axis);
-  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.QuantizeAndPopulate<integer_dtype>(m.Input(), data);
  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
-              ElementsAreArray(
-                  ArrayFloatNear({11.1294, 0.862745}, kQuantizedTolerance)));
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+  EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
+              ElementsAreArray(ArrayFloatNear({11.14}, kQuantizedTolerance)));
 }

 TEST(DynamicUint8MaxOpTest, Scalar) {
-  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
-  std::vector<float> data = {11.14};
-  MaxOpDynamicModel m({TensorType_UINT8, {}, -10.0, 12.0},
-                      {TensorType_UINT8, {}, -10.0, 12.0},
-                      {TensorType_INT32, {1}}, true);
-  std::vector<int> axis = {0};
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
-  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
-  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
-              ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance)));
+  DynamicMaxOpTestScalar<TensorType_UINT8, uint8_t>();
 }

 TEST(DynamicInt8MaxOpTest, Scalar) {
-  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
-  std::vector<float> data = {11.14};
-  MaxOpDynamicModel m({TensorType_INT8, {}, -10.0, 12.0},
-                      {TensorType_INT8, {}, -10.0, 12.0},
-                      {TensorType_INT32, {1}}, true);
-  std::vector<int> axis = {0};
-  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
-  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
-  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
-              ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance)));
+  DynamicMaxOpTestScalar<TensorType_INT8, int8_t>();
+}
+
+TEST(DynamicInt16MaxOpTest, Scalar) {
+  DynamicMaxOpTestScalar<TensorType_INT16, int16_t>();
 }

 // Tests for reduce_min
@ -1223,148 +1238,163 @@ TEST(DynamicFloatMinOpTest, Scalar) {
  EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray(ArrayFloatNear({9.527})));
 }

-TEST(ConstUint8MinOpTest, NotKeepDims) {
-  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+template <TensorType tensor_type, typename integer_dtype>
+void ConstMinOpTestNotKeepDims() {
+  const float kMin = -1;
+  const float kMax =
+      std::numeric_limits<integer_dtype>::max() /
+      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);
+  const float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
-  MinOpConstModel m({TensorType_UINT8, {1, 3, 2}, -1.0, 1.0},
-                    {TensorType_UINT8, {2}, -1.0, 1.0}, {1}, {1}, false);
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
+  MinOpConstModel m({tensor_type, {1, 3, 2}, 1.0f * kMin, 1.0f * kMax},
+                    {tensor_type, {2}, 1.0f * kMin, 1.0f * kMax}, {1}, {1},
+                    false);
+  m.QuantizeAndPopulate<integer_dtype>(m.Input(), data);
  m.Invoke();
  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
  EXPECT_THAT(
-      m.GetDequantizedOutput<uint8_t>(),
-      ElementsAreArray(ArrayFloatNear({0.294117, 0.2}, kQuantizedTolerance)));
+      m.GetDequantizedOutput<integer_dtype>(),
+      ElementsAreArray(ArrayFloatNear({0.3, 0.2}, kQuantizedTolerance)));
+}
+
+TEST(ConstUint8MinOpTest, NotKeepDims) {
+  ConstMinOpTestNotKeepDims<TensorType_UINT8, uint8_t>();
 }

 TEST(ConstInt8MinOpTest, NotKeepDims) {
-  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  ConstMinOpTestNotKeepDims<TensorType_INT8, int8_t>();
+}
+
+TEST(ConstInt16MinOpTest, NotKeepDims) {
+  ConstMinOpTestNotKeepDims<TensorType_INT16, int16_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void ConstMinOpTestKeepDims() {
+  const float kMin = -1;
+  const float kMax =
+      std::numeric_limits<integer_dtype>::max() /
+      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);
+  const float kQuantizedTolerance = GetTolerance<integer_dtype>(-1.0, 1.0);
  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
-  MinOpConstModel m({TensorType_INT8, {1, 3, 2}, -1.0, 1.0},
-                    {TensorType_INT8, {2}, -1.0, 1.0}, {1}, {1}, false);
-  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  MinOpConstModel m({tensor_type, {3, 2}, 1.0f * kMin, 1.0f * kMax},
+                    {tensor_type, {3}, 1.0f * kMin, 1.0f * kMax}, {1}, {1},
+                    true);
+  m.QuantizeAndPopulate<integer_dtype>(m.Input(), data);
  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
  EXPECT_THAT(
-      m.GetDequantizedOutput<int8_t>(),
-      ElementsAreArray(ArrayFloatNear({0.294117, 0.2}, kQuantizedTolerance)));
+      m.GetDequantizedOutput<integer_dtype>(),
+      ElementsAreArray(ArrayFloatNear({0.2, 0.3, 0.5}, kQuantizedTolerance)));
 }

 TEST(ConstUint8MinOpTest, KeepDims) {
-  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
-  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
-  MinOpConstModel m({TensorType_UINT8, {3, 2}, -1.0, 1.0},
-                    {TensorType_UINT8, {3}, -1.0, 1.0}, {1}, {1}, true);
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
-  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
-  EXPECT_THAT(
-      m.GetDequantizedOutput<uint8_t>(),
-      ElementsAreArray(ArrayFloatNear({0.2, 0.3, 0.5}, kQuantizedTolerance)));
+  ConstMinOpTestKeepDims<TensorType_UINT8, uint8_t>();
 }

 TEST(ConstInt8MinOpTest, KeepDims) {
-  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
-  std::vector<float> data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
-  MinOpConstModel m({TensorType_INT8, {3, 2}, -1.0, 1.0},
-                    {TensorType_INT8, {3}, -1.0, 1.0}, {1}, {1}, true);
-  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  ConstMinOpTestKeepDims<TensorType_INT8, int8_t>();
+}
+
+TEST(ConstInt16MinOpTest, KeepDims) {
+  ConstMinOpTestKeepDims<TensorType_INT16, int16_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void DynamicMinOpTestNotKeepDims() {
+  const float kMin = -1;
+  const float kMax =
+      std::numeric_limits<integer_dtype>::max() /
+      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);
+  const float kQuantizedTolerance = GetTolerance<integer_dtype>(-5.0, 5.0);
+  std::vector<float> data = {1.3, -4.8, -3.6, 0.24};
+  MinOpDynamicModel m({tensor_type, {2, 2}, 5.0f * kMin, 5.0f * kMax},
+                      {tensor_type, {2}, 5.0f * kMin, 5.0f * kMax},
+                      {TensorType_INT32, {1}}, false);
+  std::vector<int> axis = {1};
+  m.SetAxis(axis);
+  m.QuantizeAndPopulate<integer_dtype>(m.Input(), data);
  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1}));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
  EXPECT_THAT(
-      m.GetDequantizedOutput<int8_t>(),
-      ElementsAreArray(ArrayFloatNear({0.2, 0.3, 0.5}, kQuantizedTolerance)));
+      m.GetDequantizedOutput<integer_dtype>(),
+      ElementsAreArray(ArrayFloatNear({-4.8, -3.6}, kQuantizedTolerance)));
 }

 TEST(DynamicUint8MinOpTest, NotKeepDims) {
-  float kQuantizedTolerance = GetTolerance(-5.0, 2.0);
-  std::vector<float> data = {1.3, -4.8, -3.6, 0.24};
-  MinOpDynamicModel m({TensorType_UINT8, {2, 2}, -5.0, 2.0},
-                      {TensorType_UINT8, {2}, -5.0, 2.0},
-                      {TensorType_INT32, {1}}, false);
-  std::vector<int> axis = {1};
-  m.SetAxis(axis);
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
-  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
-  EXPECT_THAT(
-      m.GetDequantizedOutput<uint8_t>(),
-      ElementsAreArray(ArrayFloatNear({-4.807843, -3.6}, kQuantizedTolerance)));
+  DynamicMinOpTestNotKeepDims<TensorType_UINT8, uint8_t>();
 }

 TEST(DynamicInt8MinOpTest, NotKeepDims) {
-  float kQuantizedTolerance = GetTolerance(-5.0, 2.0);
-  std::vector<float> data = {1.3, -4.8, -3.6, 0.24};
-  MinOpDynamicModel m({TensorType_INT8, {2, 2}, -5.0, 2.0},
-                      {TensorType_INT8, {2}, -5.0, 2.0},
-                      {TensorType_INT32, {1}}, false);
-  std::vector<int> axis = {1};
+  DynamicMinOpTestNotKeepDims<TensorType_INT8, int8_t>();
+}
+
+TEST(DynamicInt16MinOpTest, NotKeepDims) {
+  DynamicMinOpTestNotKeepDims<TensorType_INT16, int16_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void DynamicMinOpTestKeepDims() {
+  const float kMin = -1;
+  const float kMax =
+      std::numeric_limits<integer_dtype>::max() /
+      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);
+  const float kQuantizedTolerance = GetTolerance<integer_dtype>(-12.0, 12.0);
+  std::vector<float> data = {11.14, -0.14, 7.423, 0.879};
+  MinOpDynamicModel m({tensor_type, {2, 2}, 12.0f * kMin, 12.0f * kMax},
+                      {tensor_type, {2}, 12.0f * kMin, 12.0f * kMax},
+                      {TensorType_INT32, {1}}, true);
+  std::vector<int> axis = {0};
  m.SetAxis(axis);
-  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.QuantizeAndPopulate<integer_dtype>(m.Input(), data);
  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2}));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
  EXPECT_THAT(
-      m.GetDequantizedOutput<int8_t>(),
-      ElementsAreArray(ArrayFloatNear({-4.807843, -3.6}, kQuantizedTolerance)));
+      m.GetDequantizedOutput<integer_dtype>(),
+      ElementsAreArray(ArrayFloatNear({7.423, -0.14}, kQuantizedTolerance)));
 }

 TEST(DynamicUint8MinOpTest, KeepDims) {
-  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
-  std::vector<float> data = {11.14, -0.14, 7.423, 0.879};
-  MinOpDynamicModel m({TensorType_UINT8, {2, 2}, -10.0, 12.0},
-                      {TensorType_UINT8, {2}, -10.0, 12.0},
-                      {TensorType_INT32, {1}}, true);
-  std::vector<int> axis = {0};
-  m.SetAxis(axis);
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
-  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
-              ElementsAreArray(
-                  ArrayFloatNear({7.427451, -0.164706}, kQuantizedTolerance)));
+  DynamicMinOpTestKeepDims<TensorType_UINT8, uint8_t>();
 }

 TEST(DynamicInt8MinOpTest, KeepDims) {
-  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
-  std::vector<float> data = {11.14, -0.14, 7.423, 0.879};
-  MinOpDynamicModel m({TensorType_INT8, {2, 2}, -10.0, 12.0},
-                      {TensorType_INT8, {2}, -10.0, 12.0},
+  DynamicMinOpTestKeepDims<TensorType_INT8, int8_t>();
+}
+
+TEST(DynamicInt16MinOpTest, KeepDims) {
+  DynamicMinOpTestKeepDims<TensorType_INT16, int16_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void DynamicMinOpTestScalar() {
+  const float kMin = -1;
+  const float kMax =
+      std::numeric_limits<integer_dtype>::max() /
+      static_cast<float>(std::numeric_limits<integer_dtype>::max() + 1);
+  const float kQuantizedTolerance = GetTolerance<integer_dtype>(-12.0, 12.0);
+  std::vector<float> data = {11.14};
+  MinOpDynamicModel m({tensor_type, {}, 12.0f * kMin, 12.0f * kMax},
+                      {tensor_type, {}, 12.0f * kMin, 12.0f * kMax},
                      {TensorType_INT32, {1}}, true);
  std::vector<int> axis = {0};
-  m.SetAxis(axis);
-  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
+  m.QuantizeAndPopulate<integer_dtype>(m.Input(), data);
  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2}));
-  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
-              ElementsAreArray(
-                  ArrayFloatNear({7.427451, -0.164706}, kQuantizedTolerance)));
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+  EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
+              ElementsAreArray(ArrayFloatNear({11.14}, kQuantizedTolerance)));
 }

 TEST(DynamicUint8MinOpTest, Scalar) {
-  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
-  std::vector<float> data = {11.14};
-  MinOpDynamicModel m({TensorType_UINT8, {}, -10.0, 12.0},
-                      {TensorType_UINT8, {}, -10.0, 12.0},
-                      {TensorType_INT32, {1}}, true);
-  std::vector<int> axis = {0};
-  m.QuantizeAndPopulate<uint8_t>(m.Input(), data);
-  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
-  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
-              ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance)));
+  DynamicMinOpTestScalar<TensorType_UINT8, uint8_t>();
 }

 TEST(DynamicInt8MinOpTest, Scalar) {
-  float kQuantizedTolerance = GetTolerance(-10.0, 12.0);
-  std::vector<float> data = {11.14};
-  MinOpDynamicModel m({TensorType_INT8, {}, -10.0, 12.0},
-                      {TensorType_INT8, {}, -10.0, 12.0},
-                      {TensorType_INT32, {1}}, true);
-  std::vector<int> axis = {0};
-  m.QuantizeAndPopulate<int8_t>(m.Input(), data);
-  m.Invoke();
-  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
-  EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
-              ElementsAreArray(ArrayFloatNear({11.1294}, kQuantizedTolerance)));
+  DynamicMinOpTestScalar<TensorType_INT8, int8_t>();
+}
+
+TEST(DynamicInt16MinOpTest, Scalar) {
+  DynamicMinOpTestScalar<TensorType_INT16, int16_t>();
 }

 // Tests for reduce_any
--- a/tensorflow/lite/kernels/register.cc
+++ b/tensorflow/lite/kernels/register.cc
@ -220,10 +220,10 @@ BuiltinOpResolver::BuiltinOpResolver() {
  AddBuiltin(BuiltinOperator_REDUCE_PROD, Register_REDUCE_PROD());
  AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX(),
             /* min_version = */ 1,
-             /* max_version = */ 2);
+             /* max_version = */ 3);
  AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN(),
             /* min_version = */ 1,
-             /* max_version = */ 2);
+             /* max_version = */ 3);
  AddBuiltin(BuiltinOperator_REDUCE_ANY, Register_REDUCE_ANY());
  AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXPAND_DIMS());
  AddBuiltin(BuiltinOperator_SPARSE_TO_DENSE, Register_SPARSE_TO_DENSE(),
--- a/tensorflow/lite/kernels/register_ref.cc
+++ b/tensorflow/lite/kernels/register_ref.cc
@ -101,11 +101,11 @@ TfLiteRegistration* Register_LESS_EQUAL();
 TfLiteRegistration* Register_FLOOR_REF();
 TfLiteRegistration* Register_TILE();
 TfLiteRegistration* Register_NEG();
-TfLiteRegistration* Register_SUM();
-TfLiteRegistration* Register_REDUCE_PROD();
-TfLiteRegistration* Register_REDUCE_MAX();
-TfLiteRegistration* Register_REDUCE_MIN();
-TfLiteRegistration* Register_REDUCE_ANY();
+TfLiteRegistration* Register_SUM_REF();
+TfLiteRegistration* Register_REDUCE_PROD_REF();
+TfLiteRegistration* Register_REDUCE_MAX_REF();
+TfLiteRegistration* Register_REDUCE_MIN_REF();
+TfLiteRegistration* Register_REDUCE_ANY_REF();
 TfLiteRegistration* Register_SELECT();
 TfLiteRegistration* Register_SLICE_REF();
 TfLiteRegistration* Register_SIN();
@ -375,17 +375,17 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() {
  AddBuiltin(BuiltinOperator_TILE, Register_TILE(),
             /* min_version = */ 1,
             /* max_version = */ 2);
-  AddBuiltin(BuiltinOperator_SUM, Register_SUM(),
+  AddBuiltin(BuiltinOperator_SUM, Register_SUM_REF(),
             /* min_version = */ 1,
             /* max_version = */ 2);
-  AddBuiltin(BuiltinOperator_REDUCE_PROD, Register_REDUCE_PROD());
-  AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX(),
+  AddBuiltin(BuiltinOperator_REDUCE_PROD, Register_REDUCE_PROD_REF());
+  AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX_REF(),
             /* min_version = */ 1,
-             /* max_version = */ 2);
-  AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN(),
+             /* max_version = */ 3);
+  AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN_REF(),
             /* min_version = */ 1,
-             /* max_version = */ 2);
-  AddBuiltin(BuiltinOperator_REDUCE_ANY, Register_REDUCE_ANY());
+             /* max_version = */ 3);
+  AddBuiltin(BuiltinOperator_REDUCE_ANY, Register_REDUCE_ANY_REF());
  AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXPAND_DIMS());
  AddBuiltin(BuiltinOperator_SPARSE_TO_DENSE, Register_SPARSE_TO_DENSE(),
             /* min_version = */ 1,
--- a/tensorflow/lite/tools/versioning/op_version.cc
+++ b/tensorflow/lite/tools/versioning/op_version.cc
@ -563,6 +563,8 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
    case BuiltinOperator_MEAN:
    case BuiltinOperator_PAD:
    case BuiltinOperator_PADV2:
+    case BuiltinOperator_REDUCE_MAX:
+    case BuiltinOperator_REDUCE_MIN:
    case BuiltinOperator_RELU6:
      // In case of int16 inputs, the version is 3.
      if (op_sig.input_types.at(0) == TensorType_INT16) {
@ -590,8 +592,6 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
    case BuiltinOperator_SPACE_TO_DEPTH:
    case BuiltinOperator_SPLIT_V:
    case BuiltinOperator_SUM:
-    case BuiltinOperator_REDUCE_MAX:
-    case BuiltinOperator_REDUCE_MIN:
    case BuiltinOperator_LOG_SOFTMAX:
    case BuiltinOperator_TOPK_V2:
    case BuiltinOperator_ARG_MAX:
--- a/tensorflow/lite/tools/versioning/op_version_test.cc
+++ b/tensorflow/lite/tools/versioning/op_version_test.cc
@ -312,6 +312,14 @@ TEST(OpVersionTest, VersioningSumTest) {
  SimpleVersioningTest(BuiltinOperator_SUM);
 }

+TEST(OpVersionTest, VersioningReduceMinTest) {
+  SimpleVersioningTestExtended(BuiltinOperator_REDUCE_MIN);
+}
+
+TEST(OpVersionTest, VersioningReduceMaxTest) {
+  SimpleVersioningTestExtended(BuiltinOperator_REDUCE_MAX);
+}
+
 TEST(OpVersionTest, VersioningAddTest) {
  SimpleVersioningTest(BuiltinOperator_ADD);
 }
--- a/tensorflow/lite/tools/versioning/runtime_version.cc
+++ b/tensorflow/lite/tools/versioning/runtime_version.cc
@ -186,8 +186,10 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code,
              {{BuiltinOperator_SUM, 2}, "1.15.0"},
              {{BuiltinOperator_REDUCE_MAX, 1}, "1.11.0"},
              {{BuiltinOperator_REDUCE_MAX, 2}, "1.14.0"},
+              {{BuiltinOperator_REDUCE_MAX, 3}, kPendingReleaseVersion},
              {{BuiltinOperator_REDUCE_MIN, 1}, "1.11.0"},
              {{BuiltinOperator_REDUCE_MIN, 2}, "1.14.0"},
+              {{BuiltinOperator_REDUCE_MIN, 3}, kPendingReleaseVersion},
              {{BuiltinOperator_REDUCE_PROD, 1}, "1.11.0"},
              {{BuiltinOperator_REDUCE_ANY, 1}, "1.11.0"},
              {{BuiltinOperator_RELU6, 1}, "1.5.0"},