Refactor quantize (reference and xtensa) for a complete fallback.

Without this change, we can not build the quantize kernel for the Vision
P6.

A different PR makes additional changes to be able to build all the
kernels for the Vision P6 but this change was separated out into its own
PR to keep individual PRs smaller and focused.

Tested:
```
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa OPTIMIZED_KERNEL_DIR=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=Google_F1 test_keyword_benchmark -j8
```

Gives:
```
InitializeKeywordRunner() took 239589 ticks (239 ms)
KeywordRunNIerations(1) took 171203 ticks (171 ms)
KeywordRunNIerations(10) took 1711501 ticks (1711 ms)
```

This is a small increase in the latency but is not important since we
are still only using the reference implementation.

And the kernel test passes for fusion_f1i (13 cases) and hifimini (2 cases):

```
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa OPTIMIZED_KERNEL_DIR=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=Google_F1 test_kernel_quantize_test -j8
make -f tensorflow/lite/micro/tools/make/Makefile -j8 TARGET=xtensa OPTIMIZED_KERNEL_DIR=xtensa TARGET_ARCH=hifimini XTENSA_CORE=mini1m1m_RG test_kernel_quantize_test
```
This commit is contained in:
Advait Jain 2020-12-11 16:30:11 -08:00
parent 6d8366afc4
commit 5f153decbc
7 changed files with 228 additions and 147 deletions

View File

@ -117,6 +117,7 @@ cc_library(
"pad.cc",
"pooling.cc",
"prelu.cc",
"quantize_common.cc",
"reduce.cc",
"reshape.cc",
"resize_nearest_neighbor.cc",
@ -147,6 +148,7 @@ cc_library(
}),
hdrs = [
"micro_ops.h",
"quantize.h",
"svdf.h",
],
copts = micro_copts(),

View File

@ -12,37 +12,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/quantize.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
struct OpData {
tflite::QuantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t output_multiplier;
int output_shift;
int32_t input_zero_point;
};
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
return context->AllocatePersistentBuffer(context,
sizeof(OpDataQuantizeReference));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
@ -77,8 +67,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
double effective_scale = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_scale, &data->output_multiplier,
&data->output_shift);
QuantizeMultiplier(effective_scale, &data->requantize_output_multiplier,
&data->requantize_output_shift);
}
data->quantization_params.zero_point = output->params.zero_point;
@ -88,107 +78,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
OpData* data = static_cast<OpData*>(node->user_data);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteInt8:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteUInt8:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
case kTfLiteInt16:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt16) {
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
size, data->output_multiplier,
data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->output_multiplier, data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
case kTfLiteInt32:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->output_multiplier, data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int32_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt8) {
// Int8 to Int8 requantization, required if the input and output tensors
// have different scales and/or zero points.
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(tflite::micro::GetTensorData<int8_t>(input),
size, data->output_multiplier,
data->output_shift, data->input_zero_point,
data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace
TfLiteRegistration Register_QUANTIZE() {
return {/*init=*/Init,
/*free=*/nullptr,
/*prepare=*/Prepare,
/*invoke=*/Eval,
/*invoke=*/EvalQuantizeReference,
/*profiling_string=*/nullptr,
/*builtin_code=*/0,
/*custom_name=*/nullptr,

View File

@ -0,0 +1,37 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
#define TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
namespace tflite {
struct OpDataQuantizeReference {
tflite::QuantizationParams quantization_params;
// The scaling factor from input to output (aka the 'real multiplier') can
// be represented as a fixed point multiplier plus a left shift.
int32_t requantize_output_multiplier;
int requantize_output_shift;
int32_t input_zero_point;
};
TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node);
} // namespace tflite
#endif // TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_

View File

@ -0,0 +1,123 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/quantize.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
auto* data =
static_cast<OpDataQuantizeReference*>(node->user_data);
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
if (input->type == kTfLiteFloat32) {
switch (output->type) {
case kTfLiteInt8:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteUInt8:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output));
break;
case kTfLiteInt16:
reference_ops::AffineQuantize(
data->quantization_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<float>(input),
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt16) {
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
case kTfLiteInt16:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int16_t>(output));
return kTfLiteOk;
case kTfLiteInt32:
reference_ops::Requantize(
tflite::micro::GetTensorData<int16_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int32_t>(output));
return kTfLiteOk;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else if (input->type == kTfLiteInt8) {
// Int8 to Int8 requantization, required if the input and output tensors
// have different scales and/or zero points.
size_t size = ElementCount(*input->dims);
switch (output->type) {
case kTfLiteInt8:
reference_ops::Requantize(
tflite::micro::GetTensorData<int8_t>(input), size,
data->requantize_output_multiplier, data->requantize_output_shift,
data->input_zero_point, data->quantization_params.zero_point,
tflite::micro::GetTensorData<int8_t>(output));
break;
default:
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
} else {
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
TfLiteTypeGetName(input->type),
TfLiteTypeGetName(output->type));
return kTfLiteError;
}
return kTfLiteOk;
}
} // namespace tflite

View File

@ -49,7 +49,7 @@ void ValidateQuantizeGoldens(TfLiteTensor* tensors, int tensors_size,
}
}
#if !defined(XTENSA)
#if !defined(HIFIMINI)
template <typename T>
void TestQuantizeFloat(const int* input_dims_data, const float* input_data,
const int* output_dims_data, const float* golden,
@ -79,7 +79,7 @@ void TestQuantizeFloat(const int* input_dims_data, const float* input_data,
ValidateQuantizeGoldens(tensors, tensors_size, golden, golden_quantized,
scale, zero_point, output_dims_count, output_data);
}
#endif
#endif // defined(HIFIMINI)
template <typename InputType, typename OutputType>
void TestRequantize(const int* input_dims_data, const float* input_data,
@ -121,7 +121,7 @@ void TestRequantize(const int* input_dims_data, const float* input_data,
TF_LITE_MICRO_TESTS_BEGIN
#if !defined(XTENSA)
#if !defined(HIFIMINI)
TF_LITE_MICRO_TEST(QuantizeOpTestUint8) {
const int length = 10;
const int dims[] = {2, 2, 5};
@ -267,9 +267,9 @@ TF_LITE_MICRO_TEST(QuantizeOpTestInt8toInt8NoZeroPoint) {
values_quantized, output_scale,
output_zero_point, output_quantized);
}
#endif
#endif // defined(HIFIMINI)
#if !defined(XTENSA)
#if !defined(HIFIMINI)
// TODO(b/155682734): Hifimini optimized quantize requires input scale to be
// smaller then output scale.
TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt8) {
@ -288,7 +288,7 @@ TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt8) {
values_quantized, output_scale,
output_zero_point, output_quantized);
}
#endif
#endif // defined(HIFIMINI)
TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt32) {
const int length = 10;

View File

@ -13,22 +13,23 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include <xtensa/tie/xt_hifi2.h>
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
#include "tensorflow/lite/kernels/internal/reference/requantize.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/kernel_util.h"
#include "tensorflow/lite/micro/kernels/quantize.h"
#include "tensorflow/lite/micro/kernels/xtensa/fixedpoint_utils.h"
#include "tensorflow/lite/micro/micro_utils.h"
namespace tflite {
namespace {
#if defined(HIFIMINI)
struct OpData {
int32_t zero_point = 0;
int scale_multiplier = 0;
@ -107,34 +108,7 @@ void AffineQuantize(int scale_multiplier, const int32_t zero_point,
}
}
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
auto* op_data = static_cast<OpData*>(node->user_data);
TfLiteTensor* output = GetOutput(context, node, 0);
const TfLiteTensor* input = GetInput(context, node, 0);
// TODO(b/155682734): Fix dangerous input/output scale ratio assumptions.
op_data->scale_multiplier =
CreateQConstantForInt24(0, input->params.scale / output->params.scale);
op_data->zero_point = output->params.zero_point;
op_data->input_zero_point = input->params.zero_point;
double effective_scale = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_scale, &op_data->requantize_output_multiplier,
&op_data->requantize_output_shift);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
TfLiteStatus EvalHifimini(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
auto* op_data = static_cast<OpData*>(node->user_data);
@ -162,6 +136,54 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
}
return kTfLiteOk;
}
#endif // defined(HIFIMINI)
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
#if defined(HIFIMINI)
return context->AllocatePersistentBuffer(context, sizeof(OpData));
#else
return context->AllocatePersistentBuffer(context,
sizeof(OpDataQuantizeReference));
#endif
}
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TFLITE_DCHECK(node->user_data != nullptr);
TfLiteTensor* output = GetOutput(context, node, 0);
const TfLiteTensor* input = GetInput(context, node, 0);
#if defined(HIFIMINI)
auto* op_data = static_cast<OpData*>(node->user_data);
// TODO(b/155682734): Fix dangerous input/output scale ratio assumptions.
op_data->scale_multiplier =
CreateQConstantForInt24(0, input->params.scale / output->params.scale);
op_data->zero_point = output->params.zero_point;
#else
auto* op_data = static_cast<OpDataQuantizeReference*>(node->user_data);
op_data->quantization_params.zero_point = output->params.zero_point;
op_data->quantization_params.scale =
static_cast<double>(output->params.scale);
#endif
op_data->input_zero_point = input->params.zero_point;
double effective_scale = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(effective_scale, &op_data->requantize_output_multiplier,
&op_data->requantize_output_shift);
return kTfLiteOk;
}
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
#if defined(HIFIMINI)
return EvalHifimini(context, node);
#else
return EvalQuantizeReference(context, node);
#endif
}
} // namespace

View File

@ -325,6 +325,7 @@ tensorflow/lite/micro/kernels/pad.cc \
tensorflow/lite/micro/kernels/pooling.cc \
tensorflow/lite/micro/kernels/prelu.cc \
tensorflow/lite/micro/kernels/quantize.cc \
tensorflow/lite/micro/kernels/quantize_common.cc \
tensorflow/lite/micro/kernels/reduce.cc \
tensorflow/lite/micro/kernels/reshape.cc \
tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc \