Optimize elementwise addition op handler
PiperOrigin-RevId: 277933045 Change-Id: If1c9e8f823db8da6045810956de54b04b25eed90
This commit is contained in:
parent
d73d9a237e
commit
60f77c5186
@ -58,6 +58,9 @@ struct OpData {
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
int32 combined_offset;
|
||||
int32 multiplier1;
|
||||
int32 multiplier2;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
@ -99,6 +102,31 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
data->input2_offset = -input2->params.zero_point;
|
||||
data->output_offset = output->params.zero_point;
|
||||
data->left_shift = 20;
|
||||
// Simplify elementwise addition.
|
||||
// This part of the code adds entries in two quantized tensors as follows
|
||||
// x1 = (q1 - z1)s1
|
||||
// x2 = (q2 - z2)s2
|
||||
// y = (x1 + x2)/s + z
|
||||
// s1, s2 and s are constant scales for input and output tensors.
|
||||
// z1, z2 and z are constant zero points.
|
||||
// The above equation can be simplified as below
|
||||
// y = s1.q1/s - s1.z1/s + s2.q2/s - s2.z2/s + z
|
||||
// rearranging
|
||||
// y = q1.s1/s + q2.s2/s - s1.z1/s - s2.z2/s + z
|
||||
// Setting mul1 = s1/s, mul2 = s2/s and combined_offset=-s1.z1/s-s2.z2/s+z
|
||||
// y = q1.mul1 + q2.mul2 + combined_offset.
|
||||
// These operations are performed by fixed point (12.20) arithmetic.
|
||||
const float mul1 = (input1->params.scale / output->params.scale);
|
||||
const float mul2 = (input2->params.scale / output->params.scale);
|
||||
const int32 mul1_fixed = static_cast<int32>(mul1 * (1 << data->left_shift));
|
||||
const int32 mul2_fixed = static_cast<int32>(mul2 * (1 << data->left_shift));
|
||||
const int32 combined_offset = static_cast<int32>(
|
||||
(output->params.zero_point << data->left_shift) -
|
||||
(input1->params.zero_point << data->left_shift) * mul1 -
|
||||
(input2->params.zero_point << data->left_shift) * mul2);
|
||||
data->combined_offset = combined_offset;
|
||||
data->multiplier1 = mul1_fixed;
|
||||
data->multiplier2 = mul2_fixed;
|
||||
const double twice_max_input_scale =
|
||||
2 * std::max(input1->params.scale, input2->params.scale);
|
||||
const double real_input1_multiplier =
|
||||
@ -230,6 +258,9 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
op_params.multiplier1 = data->multiplier1;
|
||||
op_params.multiplier2 = data->multiplier2;
|
||||
op_params.combined_offset = data->combined_offset;
|
||||
op_params.input1_offset = data->input1_offset;
|
||||
op_params.input1_multiplier = data->input1_multiplier;
|
||||
op_params.input1_shift = data->input1_shift;
|
||||
|
@ -1689,21 +1689,13 @@ inline void AddElementwise(int size, const ArithmeticParams& params,
|
||||
#endif // NEON
|
||||
|
||||
for (; i < size; ++i) {
|
||||
const int32 input1_val = params.input1_offset + input1_data[i];
|
||||
const int32 input2_val = params.input2_offset + input2_data[i];
|
||||
const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
|
||||
const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
|
||||
const int32 scaled_input1_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input1_val, params.input1_multiplier, params.input1_shift);
|
||||
const int32 scaled_input2_val =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
shifted_input2_val, params.input2_multiplier, params.input2_shift);
|
||||
const int32 raw_sum = scaled_input1_val + scaled_input2_val;
|
||||
const int32 raw_output =
|
||||
MultiplyByQuantizedMultiplierSmallerThanOneExp(
|
||||
raw_sum, params.output_multiplier, params.output_shift) +
|
||||
params.output_offset;
|
||||
int32 result = params.combined_offset;
|
||||
result += (static_cast<int32>(input1_data[i]) * params.multiplier1);
|
||||
result += (static_cast<int32>(input2_data[i]) * params.multiplier2);
|
||||
// We perform a simplified fixed point arithmetic here which requires
|
||||
// the raw_output be obtained by shifting using params.left_shift instead of
|
||||
// params.output_shift.
|
||||
const int32 raw_output = result >> params.left_shift;
|
||||
const int32 clamped_output =
|
||||
std::min(params.quantized_activation_max,
|
||||
std::max(params.quantized_activation_min, raw_output));
|
||||
|
@ -765,6 +765,9 @@ struct ArithmeticParams {
|
||||
int input1_shift;
|
||||
int32 input2_multiplier;
|
||||
int input2_shift;
|
||||
int32 combined_offset;
|
||||
int32 multiplier1;
|
||||
int32 multiplier2;
|
||||
// uint8, etc, activation params.
|
||||
int32 quantized_activation_min;
|
||||
int32 quantized_activation_max;
|
||||
|
@ -60,6 +60,9 @@ struct OpData {
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
int32 combined_offset;
|
||||
int32 multiplier1;
|
||||
int32 multiplier2;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
@ -131,6 +134,33 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context,
|
||||
tflite::QuantizeMultiplierSmallerThanOneExp(real_output_multiplier,
|
||||
&op_params->output_multiplier,
|
||||
&op_params->output_shift);
|
||||
// Simplify elementwise subtraction.
|
||||
// This part of the code adds entries in two quantized tensors as follows
|
||||
// x1 = (q1 - z1)s1
|
||||
// x2 = (q2 - z2)s2
|
||||
// y = (x1 - x2)/s + z
|
||||
// s1, s2 and s are constant scales for input and output tensors.
|
||||
// z1, z2 and z are constant zero points.
|
||||
// The above equation can be simplified as below
|
||||
// y = s1.q1/s - s1.z1/s - s2.q2/s + s2.z2/s + z
|
||||
// rearranging
|
||||
// y = q1.s1/s - q2.s2/s - s1.z1/s + s2.z2/s + z
|
||||
// Setting mul1 = s1/s, mul2 = -s2/s and combined_offset=-s1.z1/s +s2.z2/s + z
|
||||
// y = q1.mul1 + q2.mul2 + combined_offset.
|
||||
// These operations are performed by fixed point (12.20) arithmetic.
|
||||
const float mul1 = (input_1->params.scale / output->params.scale);
|
||||
const float mul2 = (input_2->params.scale / output->params.scale);
|
||||
const int32 mul1_fixed =
|
||||
static_cast<int32>(mul1 * (1 << op_params->left_shift));
|
||||
const int32 mul2_fixed =
|
||||
static_cast<int32>(mul2 * (1 << op_params->left_shift));
|
||||
const int32 combined_offset = static_cast<int32>(
|
||||
(output->params.zero_point << op_params->left_shift) -
|
||||
(input_1->params.zero_point << op_params->left_shift) * mul1 +
|
||||
(input_2->params.zero_point << op_params->left_shift) * mul2);
|
||||
op_params->combined_offset = combined_offset;
|
||||
op_params->multiplier1 = mul1_fixed;
|
||||
op_params->multiplier2 = -mul2_fixed;
|
||||
if (output->type == kTfLiteUInt8) {
|
||||
CalculateActivationRangeUint8(params->activation, output,
|
||||
&op_params->output_activation_min,
|
||||
@ -287,6 +317,9 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
op_params.output_offset = data->output_offset;
|
||||
op_params.output_multiplier = data->output_multiplier;
|
||||
op_params.output_shift = data->output_shift;
|
||||
op_params.multiplier1 = data->multiplier1;
|
||||
op_params.multiplier2 = data->multiplier2;
|
||||
op_params.combined_offset = data->combined_offset;
|
||||
SetActivationParams(data->output_activation_min, data->output_activation_max,
|
||||
&op_params);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user