Merge pull request #45618 from advaitjain:xtensa-quantize-refactor

PiperOrigin-RevId: 347469025 Change-Id: I210f2a803c382e33d04c37abd439a776e092e687
2020-12-14 14:30:19 -08:00 · 2020-12-14 14:30:19 -08:00 · fe6d0cf3f9
commit fe6d0cf3f9
parent 3ae790096c 26f038dcef
7 changed files with 227 additions and 145 deletions
--- a/tensorflow/lite/micro/kernels/BUILD
+++ b/tensorflow/lite/micro/kernels/BUILD
@ -117,6 +117,7 @@ cc_library(
        "pad.cc",
        "pooling.cc",
        "prelu.cc",
+        "quantize_common.cc",
        "reduce.cc",
        "reshape.cc",
        "resize_nearest_neighbor.cc",
@ -147,6 +148,7 @@ cc_library(
    }),
    hdrs = [
        "micro_ops.h",
+        "quantize.h",
        "svdf.h",
    ],
    copts = micro_copts(),
--- a/tensorflow/lite/micro/kernels/quantize.cc
+++ b/tensorflow/lite/micro/kernels/quantize.cc
@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+
+#include "tensorflow/lite/micro/kernels/quantize.h"

 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/reference/requantize.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
@ -25,24 +25,15 @@ limitations under the License.
 namespace tflite {
 namespace {

-struct OpData {
-  tflite::QuantizationParams quantization_params;
-  // The scaling factor from input to output (aka the 'real multiplier') can
-  // be represented as a fixed point multiplier plus a left shift.
-  int32_t output_multiplier;
-  int output_shift;
-
-  int32_t input_zero_point;
-};
-
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+  return context->AllocatePersistentBuffer(context,
+                                           sizeof(OpDataQuantizeReference));
 }

 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
+  auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);

  TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@ -77,8 +68,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
    double effective_scale = static_cast<double>(input->params.scale) /
                             static_cast<double>(output->params.scale);

-    QuantizeMultiplier(effective_scale, &data->output_multiplier,
-                       &data->output_shift);
+    QuantizeMultiplier(effective_scale, &data->requantize_output_multiplier,
+                       &data->requantize_output_shift);
  }

  data->quantization_params.zero_point = output->params.zero_point;
@ -88,107 +79,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
  return kTfLiteOk;
 }

-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
-  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
-
-  if (input->type == kTfLiteFloat32) {
-    switch (output->type) {
-      case kTfLiteInt8:
-        reference_ops::AffineQuantize(
-            data->quantization_params, tflite::micro::GetTensorShape(input),
-            tflite::micro::GetTensorData<float>(input),
-            tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<int8_t>(output));
-        break;
-      case kTfLiteUInt8:
-        reference_ops::AffineQuantize(
-            data->quantization_params, tflite::micro::GetTensorShape(input),
-            tflite::micro::GetTensorData<float>(input),
-            tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<uint8_t>(output));
-        break;
-      case kTfLiteInt16:
-        reference_ops::AffineQuantize(
-            data->quantization_params, tflite::micro::GetTensorShape(input),
-            tflite::micro::GetTensorData<float>(input),
-            tflite::micro::GetTensorShape(output),
-            tflite::micro::GetTensorData<int16_t>(output));
-        return kTfLiteOk;
-      default:
-        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                           TfLiteTypeGetName(input->type),
-                           TfLiteTypeGetName(output->type));
-        return kTfLiteError;
-    }
-  } else if (input->type == kTfLiteInt16) {
-    size_t size = ElementCount(*input->dims);
-    switch (output->type) {
-      case kTfLiteInt8:
-        reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
-                                  size, data->output_multiplier,
-                                  data->output_shift, data->input_zero_point,
-                                  data->quantization_params.zero_point,
-                                  tflite::micro::GetTensorData<int8_t>(output));
-        break;
-      case kTfLiteInt16:
-        reference_ops::Requantize(
-            tflite::micro::GetTensorData<int16_t>(input), size,
-            data->output_multiplier, data->output_shift, data->input_zero_point,
-            data->quantization_params.zero_point,
-            tflite::micro::GetTensorData<int16_t>(output));
-        return kTfLiteOk;
-      case kTfLiteInt32:
-        reference_ops::Requantize(
-            tflite::micro::GetTensorData<int16_t>(input), size,
-            data->output_multiplier, data->output_shift, data->input_zero_point,
-            data->quantization_params.zero_point,
-            tflite::micro::GetTensorData<int32_t>(output));
-        return kTfLiteOk;
-      default:
-        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                           TfLiteTypeGetName(input->type),
-                           TfLiteTypeGetName(output->type));
-        return kTfLiteError;
-    }
-  } else if (input->type == kTfLiteInt8) {
-    // Int8 to Int8 requantization, required if the input and output tensors
-    // have different scales and/or zero points.
-    size_t size = ElementCount(*input->dims);
-    switch (output->type) {
-      case kTfLiteInt8:
-        reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
-                                  size, data->output_multiplier,
-                                  data->output_shift, data->input_zero_point,
-                                  data->quantization_params.zero_point,
-                                  tflite::micro::GetTensorData<int8_t>(output));
-        break;
-      default:
-        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                           TfLiteTypeGetName(input->type),
-                           TfLiteTypeGetName(output->type));
-        return kTfLiteError;
-    }
-  } else {
-    TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
-                       TfLiteTypeGetName(input->type),
-                       TfLiteTypeGetName(output->type));
-    return kTfLiteError;
-  }
-
-  return kTfLiteOk;
-}
-
 }  // namespace

 TfLiteRegistration Register_QUANTIZE() {
  return {/*init=*/Init,
          /*free=*/nullptr,
          /*prepare=*/Prepare,
-          /*invoke=*/Eval,
+          /*invoke=*/EvalQuantizeReference,
          /*profiling_string=*/nullptr,
          /*builtin_code=*/0,
          /*custom_name=*/nullptr,
--- a/tensorflow/lite/micro/kernels/quantize.h
+++ b/tensorflow/lite/micro/kernels/quantize.h
@ -0,0 +1,37 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+struct OpDataQuantizeReference {
+  tflite::QuantizationParams quantization_params;
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  int32_t requantize_output_multiplier;
+  int requantize_output_shift;
+
+  int32_t input_zero_point;
+};
+
+TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node);
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
--- a/tensorflow/lite/micro/kernels/quantize_common.cc
+++ b/tensorflow/lite/micro/kernels/quantize_common.cc
@ -0,0 +1,122 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+#include "tensorflow/lite/kernels/internal/reference/requantize.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/quantize.h"
+#include "tensorflow/lite/micro/micro_utils.h"
+
+namespace tflite {
+
+TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);
+
+  const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
+  TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
+
+  if (input->type == kTfLiteFloat32) {
+    switch (output->type) {
+      case kTfLiteInt8:
+        reference_ops::AffineQuantize(
+            data->quantization_params, tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<float>(input),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int8_t>(output));
+        break;
+      case kTfLiteUInt8:
+        reference_ops::AffineQuantize(
+            data->quantization_params, tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<float>(input),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<uint8_t>(output));
+        break;
+      case kTfLiteInt16:
+        reference_ops::AffineQuantize(
+            data->quantization_params, tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<float>(input),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int16_t>(output));
+        return kTfLiteOk;
+      default:
+        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
+                           TfLiteTypeGetName(input->type),
+                           TfLiteTypeGetName(output->type));
+        return kTfLiteError;
+    }
+  } else if (input->type == kTfLiteInt16) {
+    size_t size = ElementCount(*input->dims);
+    switch (output->type) {
+      case kTfLiteInt8:
+        reference_ops::Requantize(
+            tflite::micro::GetTensorData<int16_t>(input), size,
+            data->requantize_output_multiplier, data->requantize_output_shift,
+            data->input_zero_point, data->quantization_params.zero_point,
+            tflite::micro::GetTensorData<int8_t>(output));
+        break;
+      case kTfLiteInt16:
+        reference_ops::Requantize(
+            tflite::micro::GetTensorData<int16_t>(input), size,
+            data->requantize_output_multiplier, data->requantize_output_shift,
+            data->input_zero_point, data->quantization_params.zero_point,
+            tflite::micro::GetTensorData<int16_t>(output));
+        return kTfLiteOk;
+      case kTfLiteInt32:
+        reference_ops::Requantize(
+            tflite::micro::GetTensorData<int16_t>(input), size,
+            data->requantize_output_multiplier, data->requantize_output_shift,
+            data->input_zero_point, data->quantization_params.zero_point,
+            tflite::micro::GetTensorData<int32_t>(output));
+        return kTfLiteOk;
+      default:
+        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
+                           TfLiteTypeGetName(input->type),
+                           TfLiteTypeGetName(output->type));
+        return kTfLiteError;
+    }
+  } else if (input->type == kTfLiteInt8) {
+    // Int8 to Int8 requantization, required if the input and output tensors
+    // have different scales and/or zero points.
+    size_t size = ElementCount(*input->dims);
+    switch (output->type) {
+      case kTfLiteInt8:
+        reference_ops::Requantize(
+            tflite::micro::GetTensorData<int8_t>(input), size,
+            data->requantize_output_multiplier, data->requantize_output_shift,
+            data->input_zero_point, data->quantization_params.zero_point,
+            tflite::micro::GetTensorData<int8_t>(output));
+        break;
+      default:
+        TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
+                           TfLiteTypeGetName(input->type),
+                           TfLiteTypeGetName(output->type));
+        return kTfLiteError;
+    }
+  } else {
+    TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
+                       TfLiteTypeGetName(input->type),
+                       TfLiteTypeGetName(output->type));
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
--- a/tensorflow/lite/micro/kernels/quantize_test.cc
+++ b/tensorflow/lite/micro/kernels/quantize_test.cc
@ -49,7 +49,7 @@ void ValidateQuantizeGoldens(TfLiteTensor* tensors, int tensors_size,
  }
 }

-#if !defined(XTENSA)
+#if !defined(HIFIMINI)
 template <typename T>
 void TestQuantizeFloat(const int* input_dims_data, const float* input_data,
                       const int* output_dims_data, const float* golden,
@ -79,7 +79,7 @@ void TestQuantizeFloat(const int* input_dims_data, const float* input_data,
  ValidateQuantizeGoldens(tensors, tensors_size, golden, golden_quantized,
                          scale, zero_point, output_dims_count, output_data);
 }
-#endif
+#endif  // defined(HIFIMINI)

 template <typename InputType, typename OutputType>
 void TestRequantize(const int* input_dims_data, const float* input_data,
@ -121,7 +121,7 @@ void TestRequantize(const int* input_dims_data, const float* input_data,

 TF_LITE_MICRO_TESTS_BEGIN

-#if !defined(XTENSA)
+#if !defined(HIFIMINI)
 TF_LITE_MICRO_TEST(QuantizeOpTestUint8) {
  const int length = 10;
  const int dims[] = {2, 2, 5};
@ -267,9 +267,9 @@ TF_LITE_MICRO_TEST(QuantizeOpTestInt8toInt8NoZeroPoint) {
                                  values_quantized, output_scale,
                                  output_zero_point, output_quantized);
 }
-#endif
+#endif  // defined(HIFIMINI)

-#if !defined(XTENSA)
+#if !defined(HIFIMINI)
 // TODO(b/155682734): Hifimini optimized quantize requires input scale to be
 // smaller then output scale.
 TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt8) {
@ -288,7 +288,7 @@ TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt8) {
                                  values_quantized, output_scale,
                                  output_zero_point, output_quantized);
 }
-#endif
+#endif  // defined(HIFIMINI)

 TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt32) {
  const int length = 10;
--- a/tensorflow/lite/micro/kernels/xtensa/quantize.cc
+++ b/tensorflow/lite/micro/kernels/xtensa/quantize.cc
@ -23,12 +23,14 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/quantize.h"
 #include "tensorflow/lite/micro/kernels/xtensa/fixedpoint_utils.h"
 #include "tensorflow/lite/micro/micro_utils.h"

 namespace tflite {
 namespace {

+#if defined(HIFIMINI)
 struct OpData {
  int32_t zero_point = 0;
  int scale_multiplier = 0;
@ -107,34 +109,7 @@ void AffineQuantize(int scale_multiplier, const int32_t zero_point,
  }
 }

-void* Init(TfLiteContext* context, const char* buffer, size_t length) {
-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
-TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
-  TFLITE_DCHECK(node->user_data != nullptr);
-  auto* op_data = static_cast<OpData*>(node->user_data);
-
-  TfLiteTensor* output = GetOutput(context, node, 0);
-  const TfLiteTensor* input = GetInput(context, node, 0);
-
-  // TODO(b/155682734): Fix dangerous input/output scale ratio assumptions.
-  op_data->scale_multiplier =
-      CreateQConstantForInt24(0, input->params.scale / output->params.scale);
-
-  op_data->zero_point = output->params.zero_point;
-  op_data->input_zero_point = input->params.zero_point;
-
-  double effective_scale = static_cast<double>(input->params.scale) /
-                           static_cast<double>(output->params.scale);
-  QuantizeMultiplier(effective_scale, &op_data->requantize_output_multiplier,
-                     &op_data->requantize_output_shift);
-
-  return kTfLiteOk;
-}
-
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus EvalHifimini(TfLiteContext* context, TfLiteNode* node) {
  TFLITE_DCHECK(node->user_data != nullptr);
  auto* op_data = static_cast<OpData*>(node->user_data);

@ -162,6 +137,54 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
  }
  return kTfLiteOk;
 }
+#endif  // defined(HIFIMINI)
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+#if defined(HIFIMINI)
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+#else
+  return context->AllocatePersistentBuffer(context,
+                                           sizeof(OpDataQuantizeReference));
+#endif
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+
+  TfLiteTensor* output = GetOutput(context, node, 0);
+  const TfLiteTensor* input = GetInput(context, node, 0);
+
+#if defined(HIFIMINI)
+  auto* op_data = static_cast<OpData*>(node->user_data);
+  // TODO(b/155682734): Fix dangerous input/output scale ratio assumptions.
+  op_data->scale_multiplier =
+      CreateQConstantForInt24(0, input->params.scale / output->params.scale);
+  op_data->zero_point = output->params.zero_point;
+#else
+  auto* op_data = static_cast<OpDataQuantizeReference*>(node->user_data);
+  op_data->quantization_params.zero_point = output->params.zero_point;
+  op_data->quantization_params.scale =
+      static_cast<double>(output->params.scale);
+#endif
+
+  op_data->input_zero_point = input->params.zero_point;
+
+  double effective_scale = static_cast<double>(input->params.scale) /
+                           static_cast<double>(output->params.scale);
+  QuantizeMultiplier(effective_scale, &op_data->requantize_output_multiplier,
+                     &op_data->requantize_output_shift);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+#if defined(HIFIMINI)
+  return EvalHifimini(context, node);
+#else
+  return EvalQuantizeReference(context, node);
+#endif
+}

 }  // namespace

--- a/tensorflow/lite/micro/tools/make/Makefile
+++ b/tensorflow/lite/micro/tools/make/Makefile
@ -325,6 +325,7 @@ tensorflow/lite/micro/kernels/pad.cc \
 tensorflow/lite/micro/kernels/pooling.cc \
 tensorflow/lite/micro/kernels/prelu.cc \
 tensorflow/lite/micro/kernels/quantize.cc \
+tensorflow/lite/micro/kernels/quantize_common.cc \
 tensorflow/lite/micro/kernels/reduce.cc \
 tensorflow/lite/micro/kernels/reshape.cc \
 tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc \