Use xa_nnlib for quantize for Fusion F1.
Copied the relevant function call from a737c1e394/tensorflow/lite/micro/kernels/xtensa_hifi/quantize.cc
Latency for the first quantize op (int16->int8) in the keyword_benchmark
went from 3758 ticks to 800 ticks.
Overall latency went from 38516 ticks to 34253 ticks.
Tested with:
```
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=F1_190305_swupgrade OPTIMIZED_KERNEL_DIR=xtensa run_keyword_benchmark -j8
```
Full output (for completeness):
```
InitializeKeywordRunner took 160568 ticks (160 ms).
KeywordRunNIerations(1) took 34253 ticks (34 ms)
QUANTIZE took 800 ticks (0 ms).
SVDF took 4753 ticks (4 ms).
FULLY_CONNECTED took 1353 ticks (1 ms).
SVDF took 4211 ticks (4 ms).
FULLY_CONNECTED took 1353 ticks (1 ms).
SVDF took 3145 ticks (3 ms).
FULLY_CONNECTED took 1353 ticks (1 ms).
SVDF took 4211 ticks (4 ms).
FULLY_CONNECTED took 1353 ticks (1 ms).
SVDF took 2890 ticks (2 ms).
SVDF took 3583 ticks (3 ms).
SVDF took 3054 ticks (3 ms).
FULLY_CONNECTED took 1091 ticks (1 ms).
SOFTMAX took 749 ticks (0 ms).
QUANTIZE took 354 ticks (0 ms).
```
Also tested that the kernel test passes with:
```
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=F1_190305_swupgrade OPTIMIZED_KERNEL_DIR=xtensa test_kernel_quantize_test -j8
```
Progress towards http://b/177457688
This commit is contained in:
parent
63a277f28a
commit
45d9e41015
@ -49,7 +49,7 @@ void ValidateQuantizeGoldens(TfLiteTensor* tensors, int tensors_size,
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(HIFIMINI)
|
||||
#if !defined(XTENSA)
|
||||
template <typename T>
|
||||
void TestQuantizeFloat(const int* input_dims_data, const float* input_data,
|
||||
const int* output_dims_data, const float* golden,
|
||||
@ -79,7 +79,7 @@ void TestQuantizeFloat(const int* input_dims_data, const float* input_data,
|
||||
ValidateQuantizeGoldens(tensors, tensors_size, golden, golden_quantized,
|
||||
scale, zero_point, output_dims_count, output_data);
|
||||
}
|
||||
#endif // defined(HIFIMINI)
|
||||
#endif // defined(XTENSA)
|
||||
|
||||
template <typename InputType, typename OutputType>
|
||||
void TestRequantize(const int* input_dims_data, const float* input_data,
|
||||
@ -121,7 +121,7 @@ void TestRequantize(const int* input_dims_data, const float* input_data,
|
||||
|
||||
TF_LITE_MICRO_TESTS_BEGIN
|
||||
|
||||
#if !defined(HIFIMINI)
|
||||
#if !defined(XTENSA)
|
||||
TF_LITE_MICRO_TEST(QuantizeOpTestUint8) {
|
||||
const int length = 10;
|
||||
const int dims[] = {2, 2, 5};
|
||||
@ -267,9 +267,9 @@ TF_LITE_MICRO_TEST(QuantizeOpTestInt8toInt8NoZeroPoint) {
|
||||
values_quantized, output_scale,
|
||||
output_zero_point, output_quantized);
|
||||
}
|
||||
#endif // defined(HIFIMINI)
|
||||
#endif // defined(XTENSA)
|
||||
|
||||
#if !defined(HIFIMINI)
|
||||
#if !defined(XTENSA)
|
||||
// TODO(b/155682734): Hifimini optimized quantize requires input scale to be
|
||||
// smaller then output scale.
|
||||
TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt8) {
|
||||
@ -288,7 +288,7 @@ TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt8) {
|
||||
values_quantized, output_scale,
|
||||
output_zero_point, output_quantized);
|
||||
}
|
||||
#endif // defined(HIFIMINI)
|
||||
#endif // defined(XTENSA)
|
||||
|
||||
TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt32) {
|
||||
const int length = 10;
|
||||
|
@ -109,25 +109,55 @@ void AffineQuantize(int scale_multiplier, const int32_t zero_point,
|
||||
}
|
||||
}
|
||||
|
||||
TfLiteStatus EvalHifimini(TfLiteContext* context, TfLiteNode* node) {
|
||||
#endif // defined(HIFIMINI)
|
||||
|
||||
TfLiteStatus EvalXtensa(TfLiteContext* context, TfLiteNode* node) {
|
||||
TFLITE_DCHECK(node->user_data != nullptr);
|
||||
#if defined(HIFIMINI)
|
||||
auto* op_data = static_cast<OpData*>(node->user_data);
|
||||
#elif defined(FUSION_F1)
|
||||
auto* op_data = static_cast<OpDataQuantizeReference*>(node->user_data);
|
||||
#endif
|
||||
|
||||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
|
||||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
|
||||
|
||||
if (output->type == kTfLiteInt8 && input->type == kTfLiteInt16) {
|
||||
#if defined(HIFIMINI)
|
||||
AffineQuantize(op_data->scale_multiplier, op_data->zero_point,
|
||||
tflite::micro::GetTensorShape(input),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
tflite::micro::GetTensorShape(output),
|
||||
tflite::micro::GetTensorData<int8_t>(output));
|
||||
#elif defined(FUSION_F1)
|
||||
int size = ElementCount(*input->dims);
|
||||
TF_LITE_ENSURE_EQ(
|
||||
context,
|
||||
xa_nn_elm_quantize_asym16s_asym8s(
|
||||
tflite::micro::GetTensorData<int8_t>(output),
|
||||
tflite::micro::GetTensorData<int16_t>(input),
|
||||
op_data->input_zero_point, op_data->quantization_params.zero_point,
|
||||
op_data->requantize_output_shift,
|
||||
op_data->requantize_output_multiplier, size),
|
||||
0);
|
||||
#else
|
||||
static_assert(false, "Unsupported xtensa architecture.");
|
||||
#endif
|
||||
} else if (output->type == kTfLiteInt32 && input->type == kTfLiteInt16) {
|
||||
int size = ElementCount(*input->dims);
|
||||
|
||||
// This ifdef is only needed because the hifimini code is not following the
|
||||
// convention of the rest of the codebase. Ideally we would be using the
|
||||
// same structs as much as possible and reduce the need for such ifdefs.
|
||||
#if defined(HIFIMINI)
|
||||
int32_t zero_point = op_data->zero_point;
|
||||
#elif defined(FUSION_F1)
|
||||
int32_t zero_point = op_data->quantization_params.zero_point;
|
||||
#endif
|
||||
reference_ops::Requantize(tflite::micro::GetTensorData<int16_t>(input),
|
||||
size, op_data->requantize_output_multiplier,
|
||||
op_data->requantize_output_shift,
|
||||
op_data->input_zero_point, op_data->zero_point,
|
||||
op_data->input_zero_point, zero_point,
|
||||
tflite::micro::GetTensorData<int32_t>(output));
|
||||
} else {
|
||||
TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.",
|
||||
@ -137,7 +167,6 @@ TfLiteStatus EvalHifimini(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
return kTfLiteOk;
|
||||
}
|
||||
#endif // defined(HIFIMINI)
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
|
||||
@ -179,8 +208,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
#if defined(HIFIMINI)
|
||||
return EvalHifimini(context, node);
|
||||
#if defined(HIFIMINI) || defined(FUSION_F1)
|
||||
return EvalXtensa(context, node);
|
||||
#else
|
||||
return EvalQuantizeReference(context, node);
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user