Addressed reviewer comments.
This commit is contained in:
parent
b94cb4732a
commit
924d0b72c5
@ -58,6 +58,11 @@ struct OpData {
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
|
||||
// This parameter is used to indicate whether
|
||||
// parameter scale is power of two.
|
||||
// It is used in 16-bit -> 16-bit quantization.
|
||||
bool pot_scale_16bit;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
@ -95,12 +100,36 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
// 8bit -> 8bit general quantized path, with general rescalings
|
||||
// as well as, 16bit -> 16bit with general rescalings
|
||||
bool general_16bit = input1->type == kTfLiteInt16 &&
|
||||
input2->type == kTfLiteInt16 &&
|
||||
output->type == kTfLiteInt16;
|
||||
bool pot_scale_16bit = false;
|
||||
|
||||
bool input1_scale_is_pot = false;
|
||||
bool input2_scale_is_pot = false;
|
||||
bool output_scale_is_pot = false;
|
||||
|
||||
int input1_scale_log2_rounded;
|
||||
int input2_scale_log2_rounded;
|
||||
int output_scale_log2_rounded;
|
||||
|
||||
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
|
||||
output->type == kTfLiteInt16) {
|
||||
// Check that param scale is POT
|
||||
input1_scale_is_pot =
|
||||
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
|
||||
|
||||
input2_scale_is_pot =
|
||||
CheckedLog2(input2->params.scale, &input2_scale_log2_rounded);
|
||||
|
||||
output_scale_is_pot =
|
||||
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
|
||||
|
||||
pot_scale_16bit = input1_scale_log2_rounded && input2_scale_log2_rounded &&
|
||||
output_scale_log2_rounded;
|
||||
}
|
||||
|
||||
data->pot_scale_16bit = pot_scale_16bit;
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
|
||||
general_16bit) {
|
||||
pot_scale_16bit) {
|
||||
// 8bit -> 8bit general quantized path, with general rescalings
|
||||
// as well as, 16bit -> 16bit with general rescalings
|
||||
data->input1_offset = -input1->params.zero_point;
|
||||
@ -110,7 +139,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
// The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly.
|
||||
// In case of 16-bit we have 65535 << 15 which is less than 1 << 31,
|
||||
// therefore the addition will still fit in a 32 bit accumulator.
|
||||
data->left_shift = general_16bit ? 15 : 20;
|
||||
data->left_shift = pot_scale_16bit ? 15 : 20;
|
||||
const double twice_max_input_scale =
|
||||
2 * std::max(input1->params.scale, input2->params.scale);
|
||||
const double real_input1_multiplier =
|
||||
@ -146,19 +175,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0);
|
||||
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
|
||||
|
||||
int input1_scale_log2_rounded;
|
||||
bool input1_scale_is_pot =
|
||||
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
|
||||
TF_LITE_ENSURE(context, input1_scale_is_pot);
|
||||
|
||||
int input2_scale_log2_rounded;
|
||||
bool input2_scale_is_pot =
|
||||
CheckedLog2(input2->params.scale, &input2_scale_log2_rounded);
|
||||
TF_LITE_ENSURE(context, input2_scale_is_pot);
|
||||
|
||||
int output_scale_log2_rounded;
|
||||
bool output_scale_is_pot =
|
||||
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
|
||||
TF_LITE_ENSURE(context, output_scale_is_pot);
|
||||
|
||||
data->input1_shift = input1_scale_log2_rounded - output_scale_log2_rounded;
|
||||
@ -233,12 +251,8 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output) {
|
||||
bool general_16bit = input1->type == kTfLiteInt16 &&
|
||||
input2->type == kTfLiteInt16 &&
|
||||
output->type == kTfLiteInt16;
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
|
||||
general_16bit) {
|
||||
data->pot_scale_16bit) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.left_shift = data->left_shift;
|
||||
op_params.input1_offset = data->input1_offset;
|
||||
@ -277,7 +291,10 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
if (need_broadcast) {
|
||||
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, int16_t);
|
||||
} else {
|
||||
TF_LITE_ADD(reference_ops, Add, int16_t);
|
||||
reference_ops::Add(
|
||||
op_params, GetTensorShape(input1), GetTensorData<int16_t>(input1),
|
||||
GetTensorShape(input2), GetTensorData<int16_t>(input2),
|
||||
GetTensorShape(output), GetTensorData<int16_t>(output), false);
|
||||
}
|
||||
} else {
|
||||
if (kernel_type == kReference) {
|
||||
@ -296,12 +313,12 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
}
|
||||
#undef TF_LITE_ADD
|
||||
} else if (output->type == kTfLiteInt16) {
|
||||
tflite::ArithmeticParams op_params;
|
||||
op_params.input1_shift = data->input1_shift;
|
||||
op_params.input2_shift = data->input2_shift;
|
||||
SetActivationParams(data->output_activation_min,
|
||||
data->output_activation_max, &op_params);
|
||||
#define TF_LITE_ADD(type, opname) \
|
||||
tflite::ArithmeticParams op_params; \
|
||||
op_params.input1_shift = data->input1_shift; \
|
||||
op_params.input2_shift = data->input2_shift; \
|
||||
SetActivationParams(data->output_activation_min, \
|
||||
data->output_activation_max, &op_params); \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<int16_t>(input1), GetTensorShape(input2), \
|
||||
GetTensorData<int16_t>(input2), GetTensorShape(output), \
|
||||
@ -309,7 +326,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
// The quantized version of Add doesn't support activations, so we
|
||||
// always use BroadcastAdd.
|
||||
if (kernel_type == kReference) {
|
||||
TF_LITE_ADD(reference_ops, AddLSTM);
|
||||
TF_LITE_ADD(reference_ops, Add);
|
||||
} else {
|
||||
TF_LITE_ADD(optimized_ops, Add);
|
||||
}
|
||||
|
@ -137,10 +137,13 @@ inline void Add(const ArithmeticParams& params,
|
||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int16* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16* input2_data,
|
||||
const RuntimeShape& output_shape, int16* output_data) {
|
||||
inline void AddGeneralParamScale(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape,
|
||||
const int16* input1_data,
|
||||
const RuntimeShape& input2_shape,
|
||||
const int16* input2_data,
|
||||
const RuntimeShape& output_shape,
|
||||
int16* output_data) {
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
const int flat_size =
|
||||
@ -155,10 +158,17 @@ inline void Add(const ArithmeticParams& params,
|
||||
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
|
||||
}
|
||||
|
||||
inline void AddLSTM(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int16* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16* input2_data,
|
||||
const RuntimeShape& output_shape, int16* output_data) {
|
||||
inline void Add(const ArithmeticParams& params,
|
||||
const RuntimeShape& input1_shape, const int16* input1_data,
|
||||
const RuntimeShape& input2_shape, const int16* input2_data,
|
||||
const RuntimeShape& output_shape, int16* output_data,
|
||||
bool pot_scale = true) {
|
||||
if (!pot_scale) {
|
||||
AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
|
||||
input2_data, output_shape, output_data);
|
||||
return;
|
||||
}
|
||||
|
||||
TFLITE_DCHECK_LE(params.quantized_activation_min,
|
||||
params.quantized_activation_max);
|
||||
|
||||
|
@ -60,6 +60,11 @@ struct OpData {
|
||||
int32 input1_offset;
|
||||
int32 input2_offset;
|
||||
int32 output_offset;
|
||||
|
||||
// This parameter is used to indicate whether
|
||||
// parameter scale is power of two.
|
||||
// It is used in 16-bit -> 16-bit quantization.
|
||||
bool pot_scale_16bit;
|
||||
};
|
||||
|
||||
void* Init(TfLiteContext* context, const char* buffer, size_t length) {
|
||||
@ -147,10 +152,11 @@ TfLiteStatus PrepareGeneralSubOp(TfLiteContext* context,
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
TfLiteStatus PrepareLSTMSubOp(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2, TfLiteTensor* output,
|
||||
TfLiteSubParams* params, OpData* data) {
|
||||
TfLiteStatus PrepareInt16SubOpPOT(TfLiteContext* context,
|
||||
const TfLiteTensor* input1,
|
||||
const TfLiteTensor* input2,
|
||||
TfLiteTensor* output, TfLiteSubParams* params,
|
||||
OpData* data) {
|
||||
// 16bit -> 16bit special quantized path, supporting only a rather
|
||||
// narrow case of quantization parameters: zero_points must all be 0
|
||||
// ("symmetric quantization") and scales must be power-of-two (which
|
||||
@ -219,19 +225,42 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
|
||||
|
||||
// 8bit -> 8bit general quantized path, with general rescalings
|
||||
// as well as, 16bit -> 16bit with general rescalings
|
||||
bool pot_scale_16bit = false;
|
||||
|
||||
bool general_16bit = output->type == kTfLiteInt16 &&
|
||||
input1->type == kTfLiteInt16 &&
|
||||
input2->type == kTfLiteInt16;
|
||||
bool input1_scale_is_pot = false;
|
||||
bool input2_scale_is_pot = false;
|
||||
bool output_scale_is_pot = false;
|
||||
|
||||
int input1_scale_log2_rounded;
|
||||
int input2_scale_log2_rounded;
|
||||
int output_scale_log2_rounded;
|
||||
|
||||
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
|
||||
output->type == kTfLiteInt16) {
|
||||
// Check that param scale is POT
|
||||
input1_scale_is_pot =
|
||||
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
|
||||
|
||||
input2_scale_is_pot =
|
||||
CheckedLog2(input2->params.scale, &input2_scale_log2_rounded);
|
||||
|
||||
output_scale_is_pot =
|
||||
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
|
||||
|
||||
pot_scale_16bit = input1_scale_log2_rounded && input2_scale_log2_rounded &&
|
||||
output_scale_log2_rounded;
|
||||
}
|
||||
|
||||
data->pot_scale_16bit = pot_scale_16bit;
|
||||
|
||||
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
|
||||
general_16bit) {
|
||||
pot_scale_16bit) {
|
||||
TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2,
|
||||
output, params, data, -1));
|
||||
} else if (output->type == kTfLiteInt16) {
|
||||
// LSTM-special case with scale parameter of POT
|
||||
TF_LITE_ENSURE_OK(context, PrepareLSTMSubOp(context, input1, input2, output,
|
||||
params, data));
|
||||
TF_LITE_ENSURE_OK(context, PrepareInt16SubOpPOT(context, input1, input2,
|
||||
output, params, data));
|
||||
}
|
||||
|
||||
return context->ResizeTensor(context, output, output_size);
|
||||
@ -306,11 +335,6 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
const bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
|
||||
GetTensorShape(input1), GetTensorShape(input2), &op_params);
|
||||
|
||||
// 16bit -> 16bit with general rescaling
|
||||
bool general_16bit = output->type == kTfLiteInt16 &&
|
||||
input1->type == kTfLiteInt16 &&
|
||||
input2->type == kTfLiteInt16;
|
||||
|
||||
#define TF_LITE_SUB(type, opname, data_type) \
|
||||
type::opname(op_params, GetTensorShape(input1), \
|
||||
GetTensorData<data_type>(input1), GetTensorShape(input2), \
|
||||
@ -324,11 +348,14 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
|
||||
} else {
|
||||
TF_LITE_SUB(reference_integer_ops, Add, int8_t);
|
||||
}
|
||||
} else if (general_16bit) {
|
||||
} else if (data->pot_scale_16bit) {
|
||||
if (need_broadcast) {
|
||||
TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, int16_t);
|
||||
} else {
|
||||
TF_LITE_SUB(reference_ops, Add, int16_t);
|
||||
reference_ops::Add(op_params, GetTensorShape(input1),
|
||||
GetTensorData<int16_t>(input1), GetTensorShape(input2),
|
||||
GetTensorData<int16_t>(input2), GetTensorShape(output),
|
||||
GetTensorData<int16_t>(output), false);
|
||||
}
|
||||
} else if (output->type == kTfLiteUInt8) {
|
||||
if (kernel_type == kReference) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user