Merge pull request #45618 from advaitjain:xtensa-quantize-refactor

PiperOrigin-RevId: 347469025
Change-Id: I210f2a803c382e33d04c37abd439a776e092e687
This commit is contained in:
TensorFlower Gardener 2020-12-14 14:30:19 -08:00
commit fe6d0cf3f9
7 changed files with 227 additions and 145 deletions

View File

@ -117,6 +117,7 @@ cc_library(
"pad.cc",
"pooling.cc",
"prelu.cc",
"quantize_common.cc",
"reduce.cc",
"reshape.cc",
"resize_nearest_neighbor.cc",
@ -147,6 +148,7 @@ cc_library(
}),
hdrs = [
"micro_ops.h",
"quantize.h",
"svdf.h",
],
copts = micro_copts(),

View File

@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/micro/kernels/quantize.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
@ -25,24 +25,15 @@ limitations under the License.
namespace tflite {
namespace {
struct OpData {
tflite::QuantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
int32_t input_zero_point;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
return context->AllocatePersistentBuffer(context,
sizeof(OpDataQuantizeReference));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@ -77,8 +68,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
double effective_scale = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_scale, &data->output_multiplier,
&data->output_shift);
QuantizeMultiplier(effective_scale, &data->requantize_output_multiplier,
&data->requantize_output_shift);
}
data->quantization_params.zero_point = output->params.zero_point;
@ -88,107 +79,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteInt8:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteUInt8:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
case kTfLiteInt16:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt16) {
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
size, data->output_multiplier,
data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->output_multiplier, data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
case kTfLiteInt32:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->output_multiplier, data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int32_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt8) {
// Int8 to Int8 requantization, required if the input and output tensors
// have different scales and/or zero points.
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
size, data->output_multiplier,
data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_QUANTIZE() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*invoke=*/EvalQuantizeReference,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,

View File

@ -0,0 +1,37 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/types.h"
namespace tflite {
struct OpDataQuantizeReference {
tflite::QuantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t requantize_output_multiplier;
int requantize_output_shift;
int32_t input_zero_point;
};
TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_

View File

@ -0,0 +1,122 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/quantize.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteInt8:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteUInt8:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
case kTfLiteInt16:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt16) {
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
case kTfLiteInt32:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int32_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt8) {
// Int8 to Int8 requantization, required if the input and output tensors
// have different scales and/or zero points.
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(
tflite::micro::GetTensorData<int8_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace tflite

View File

@ -49,7 +49,7 @@ void ValidateQuantizeGoldens(TfLiteTensor* tensors, int tensors_size,
}
}
#if !defined(XTENSA)
#if !defined(HIFIMINI)
template <typename T>
void TestQuantizeFloat(const int* input_dims_data, const float* input_data,
const int* output_dims_data, const float* golden,
@ -79,7 +79,7 @@ void TestQuantizeFloat(const int* input_dims_data, const float* input_data,
ValidateQuantizeGoldens(tensors, tensors_size, golden, golden_quantized,
scale, zero_point, output_dims_count, output_data);
}
#endif
#endif // defined(HIFIMINI)
template <typename InputType, typename OutputType>
void TestRequantize(const int* input_dims_data, const float* input_data,
@ -121,7 +121,7 @@ void TestRequantize(const int* input_dims_data, const float* input_data,
TF_LITE_MICRO_TESTS_BEGIN
#if !defined(XTENSA)
#if !defined(HIFIMINI)
TF_LITE_MICRO_TEST(QuantizeOpTestUint8) {
const int length = 10;
const int dims[] = {2, 2, 5};
@ -267,9 +267,9 @@ TF_LITE_MICRO_TEST(QuantizeOpTestInt8toInt8NoZeroPoint) {
values_quantized, output_scale,
output_zero_point, output_quantized);
}
#endif
#endif // defined(HIFIMINI)
#if !defined(XTENSA)
#if !defined(HIFIMINI)
// TODO(b/155682734): Hifimini optimized quantize requires input scale to be
// smaller then output scale.
TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt8) {
@ -288,7 +288,7 @@ TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt8) {
values_quantized, output_scale,
output_zero_point, output_quantized);
}
#endif
#endif // defined(HIFIMINI)
TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt32) {
const int length = 10;

View File

@ -23,12 +23,14 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/quantize.h"
#include "tensorflow/lite/micro/kernels/xtensa/fixedpoint_utils.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
#if defined(HIFIMINI)
struct OpData {
int32_t zero_point = 0;
int scale_multiplier = 0;
@ -107,34 +109,7 @@ void AffineQuantize(int scale_multiplier, const int32_t zero_point,
}
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
auto* op_data = static_cast<OpData*>(node->user_data);
TfLiteTensor* output = GetOutput(context, node, 0);
const TfLiteTensor* input = GetInput(context, node, 0);
// TODO(b/155682734): Fix dangerous input/output scale ratio assumptions.
op_data->scale_multiplier =
CreateQConstantForInt24(0, input->params.scale / output->params.scale);
op_data->zero_point = output->params.zero_point;
op_data->input_zero_point = input->params.zero_point;
double effective_scale = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_scale, &op_data->requantize_output_multiplier,
&op_data->requantize_output_shift);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteStatus EvalHifimini(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
auto* op_data = static_cast<OpData*>(node->user_data);
@ -162,6 +137,54 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
}
return kTfLiteOk;
}
#endif // defined(HIFIMINI)
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
#if defined(HIFIMINI)
return context->AllocatePersistentBuffer(context, sizeof(OpData));
#else
return context->AllocatePersistentBuffer(context,
sizeof(OpDataQuantizeReference));
#endif
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
const TfLiteTensor* input = GetInput(context, node, 0);
#if defined(HIFIMINI)
auto* op_data = static_cast<OpData*>(node->user_data);
// TODO(b/155682734): Fix dangerous input/output scale ratio assumptions.
op_data->scale_multiplier =
CreateQConstantForInt24(0, input->params.scale / output->params.scale);
op_data->zero_point = output->params.zero_point;
#else
auto* op_data = static_cast<OpDataQuantizeReference*>(node->user_data);
op_data->quantization_params.zero_point = output->params.zero_point;
op_data->quantization_params.scale =
static_cast<double>(output->params.scale);
#endif
op_data->input_zero_point = input->params.zero_point;
double effective_scale = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_scale, &op_data->requantize_output_multiplier,
&op_data->requantize_output_shift);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
#if defined(HIFIMINI)
return EvalHifimini(context, node);
#else
return EvalQuantizeReference(context, node);
#endif
}
} // namespace

View File

@ -325,6 +325,7 @@ tensorflow/lite/micro/kernels/pad.cc \
tensorflow/lite/micro/kernels/pooling.cc \
tensorflow/lite/micro/kernels/prelu.cc \
tensorflow/lite/micro/kernels/quantize.cc \
tensorflow/lite/micro/kernels/quantize_common.cc \
tensorflow/lite/micro/kernels/reduce.cc \
tensorflow/lite/micro/kernels/reshape.cc \
tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc \