HiFi Mini quantize optimized kernel.
PiperOrigin-RevId: 292217634 Change-Id: Idb599aaaaebf59464cbe713cc5fb951c6922d684
This commit is contained in:
parent
51b03bfa63
commit
314c578ddd
@ -217,6 +217,20 @@ inline void QuantizeMultiplier(double double_multiplier,
|
||||
*quantized_multiplier = static_cast<int32_t>(q_fixed);
|
||||
}
|
||||
|
||||
//
|
||||
// Convert a floating point number to a Q representation for 24 bit integers.
|
||||
//
|
||||
inline int CreateQConstantForInt24(int integer_bits, float f) {
|
||||
const double min_bounds = static_cast<double>(INT24_MIN);
|
||||
const double max_bounds = static_cast<double>(INT24_MAX);
|
||||
|
||||
int fractional_bits = 23 - integer_bits;
|
||||
double raw = std::round(f * static_cast<double>(1 << fractional_bits));
|
||||
raw = std::max(raw, min_bounds);
|
||||
raw = std::min(raw, max_bounds);
|
||||
return static_cast<int>(raw);
|
||||
}
|
||||
|
||||
} // namespace hifimini
|
||||
} // namespace xtensa
|
||||
} // namespace micro
|
||||
|
||||
178
tensorflow/lite/micro/kernels/xtensa-hifimini/quantize.cc
Normal file
178
tensorflow/lite/micro/kernels/xtensa-hifimini/quantize.cc
Normal file
@ -0,0 +1,178 @@
|
||||
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/lite/kernels/internal/reference/quantize.h"
|
||||
|
||||
#include <xtensa/tie/xt_hifi2.h>
|
||||
|
||||
#include "tensorflow/lite/c/common.h"
|
||||
#include "tensorflow/lite/kernels/internal/quantization_util.h"
|
||||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
|
||||
#include "tensorflow/lite/kernels/kernel_util.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa-hifimini/fixedpoint_utils.h"
|
||||
#include "tensorflow/lite/micro/kernels/xtensa-hifimini/utils.h"
|
||||
|
||||
namespace tflite {
|
||||
namespace ops {
|
||||
namespace micro {
|
||||
|
||||
namespace xtensa {
|
||||
namespace hifimini {
|
||||
|
||||
void AffineQuantize(int scale_multiplier,
|
||||
const tflite::QuantizationParams& op_params,
|
||||
const RuntimeShape& input_shape, const int16_t* input_data,
|
||||
const RuntimeShape& output_shape, int8_t* output_data) {
|
||||
const int32 zero_point = op_params.zero_point;
|
||||
const int flat_size = MatchingFlatSize(input_shape, output_shape);
|
||||
ae_q56s min_val_56 = AE_CVTQ48A32S(INT16_MIN);
|
||||
ae_q56s max_val_56 = AE_CVTQ48A32S(INT16_MAX);
|
||||
ae_q56s zero_point_56 = AE_CVTQ48A32S(zero_point);
|
||||
|
||||
const ae_p16x2s* input_data_ptr = (const ae_p16x2s*)(input_data - 2);
|
||||
|
||||
ae_p24x2s scale_multiplier_24x2 = AE_CONVERT_INT32_24x2(scale_multiplier);
|
||||
|
||||
int iters = flat_size / 2;
|
||||
for (int i = 0; i < iters; i++) {
|
||||
// Load two 16bit pairs into the 2x24bit register PR:
|
||||
// Values need to be right shifted 8 bits to align from upper 16bits to a
|
||||
// 24bit value:
|
||||
ae_p24x2s inputs_24x2;
|
||||
AE_LP16X2F_IU(inputs_24x2, input_data_ptr, 4);
|
||||
inputs_24x2 = AE_P24X2S_SRAI(inputs_24x2, 8);
|
||||
|
||||
// Q0.23 * Q16.0 == Q16.23
|
||||
ae_q56s sum_56 = AE_ZEROQ56();
|
||||
|
||||
{
|
||||
AE_MULAS56P24S_HH(sum_56, scale_multiplier_24x2, inputs_24x2);
|
||||
|
||||
// Q16.23 -> Q16.0
|
||||
// Shift right only 7 bits (23 - 16). This truncated shift aligns the
|
||||
// 16bit value at the truncation line for 32bit in the QR register. The
|
||||
// lower 16 bits will be used for rounding in AE_ROUNDSQ32SYM.
|
||||
sum_56 = AE_Q56S_SRAI(sum_56, 7);
|
||||
|
||||
// Round and truncate 32 bits
|
||||
sum_56 = AE_ROUNDSQ32SYM(sum_56);
|
||||
|
||||
// Add offset (zero_point_56 is already aligned at 32bits.
|
||||
sum_56 = AE_ADDQ56(sum_56, zero_point_56);
|
||||
|
||||
// Saturate:
|
||||
sum_56 = AE_MINQ56S(sum_56, max_val_56);
|
||||
sum_56 = AE_MAXQ56S(sum_56, min_val_56);
|
||||
|
||||
output_data[i * 2] = static_cast<int16_t>(AE_TRUNCA32Q48(sum_56));
|
||||
}
|
||||
|
||||
sum_56 = AE_ZEROQ56();
|
||||
{
|
||||
AE_MULAS56P24S_LL(sum_56, scale_multiplier_24x2, inputs_24x2);
|
||||
|
||||
// Q16.23 -> Q16.0
|
||||
// Shift right only 7 bits (23 - 16). This truncated shift aligns the
|
||||
// 16bit value at the truncation line for 32bit in the QR register. The
|
||||
// lower 16 bits will be used for rounding in AE_ROUNDSQ32SYM.
|
||||
sum_56 = AE_Q56S_SRAI(sum_56, 23 - 16);
|
||||
|
||||
// Round and truncate 32 bits
|
||||
sum_56 = AE_ROUNDSQ32SYM(sum_56);
|
||||
|
||||
// Add offset (zero_point_56 is already aligned at 32bits.
|
||||
sum_56 = AE_ADDQ56(sum_56, zero_point_56);
|
||||
|
||||
// Saturate:
|
||||
sum_56 = AE_MINQ56S(sum_56, max_val_56);
|
||||
sum_56 = AE_MAXQ56S(sum_56, min_val_56);
|
||||
|
||||
output_data[i * 2 + 1] = static_cast<int16_t>(AE_TRUNCA32Q48(sum_56));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace hifimini
|
||||
} // namespace xtensa
|
||||
|
||||
namespace quantize {
|
||||
|
||||
struct OpData {
|
||||
int scale_multiplier = 0;
|
||||
};
|
||||
|
||||
static OpData kStaticOpData;
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Free(TfLiteContext* context, void* buffer) {}
|
||||
|
||||
TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TfLiteTensor* output = &context->tensors[node->outputs->data[0]];
|
||||
|
||||
// TODO(b/132070898): Use statically slotted OpData structures until a
|
||||
// scratch memory API is ready.
|
||||
OpData* op_data = &kStaticOpData;
|
||||
node->user_data = op_data;
|
||||
|
||||
op_data->scale_multiplier =
|
||||
xtensa::hifimini::CreateQConstantForInt24(0, 1.f / output->params.scale);
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
|
||||
auto* op_data = reinterpret_cast<OpData*>(node->user_data);
|
||||
|
||||
TfLiteTensor* input = &context->tensors[node->inputs->data[0]];
|
||||
TfLiteTensor* output = &context->tensors[node->outputs->data[0]];
|
||||
|
||||
tflite::QuantizationParams op_params;
|
||||
op_params.zero_point = output->params.zero_point;
|
||||
op_params.scale = static_cast<double>(output->params.scale);
|
||||
|
||||
if (input->type != kTfLiteInt16 && output->type != kTfLiteInt8) {
|
||||
context->ReportError(context, "Input %s, output %s not supported.",
|
||||
TfLiteTypeGetName(input->type),
|
||||
TfLiteTypeGetName(output->type));
|
||||
return kTfLiteError;
|
||||
}
|
||||
|
||||
xtensa::hifimini::AffineQuantize(
|
||||
op_data->scale_multiplier, op_params, GetTensorShape(input),
|
||||
GetTensorData<int16_t>(input), GetTensorShape(output),
|
||||
GetTensorData<int8_t>(output));
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
} // namespace quantize
|
||||
|
||||
// This Op (QUANTIZE) quantizes the input and produces quantized output.
|
||||
// AffineQuantize takes scale and zero point and quantizes the float value to
|
||||
// quantized output, in int8 or uint8 format.
|
||||
TfLiteRegistration* Register_QUANTIZE() {
|
||||
static TfLiteRegistration r = {};
|
||||
r.init = quantize::Init;
|
||||
r.free = quantize::Free;
|
||||
r.prepare = quantize::Prepare;
|
||||
r.invoke = quantize::Eval;
|
||||
return &r;
|
||||
}
|
||||
|
||||
} // namespace micro
|
||||
} // namespace ops
|
||||
} // namespace tflite
|
||||
Loading…
x
Reference in New Issue
Block a user