Reland (Attempt #3) PR #35985: [TFLite int16] 16-bit version of ADD/SUB reference kernel operators

Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/35985

This PR is one of steps to extend 8-bit quantization to support symmetric 16-bit activations.

Each activation is of type int16 and symmetric around zero. The weight tensor precision remains at 8-bit signed values. The bias is set to int64 precision.

In this PR we introduce implementation and tests for ADD/SUB kernel reference function.
The specification of this operator:

SUB
  Input 0:
    data_type  : int16
    range      : [-32768, 32767]
    granularity: per-tensor, zero_point=0
  Input 1:
    data_type  : int16
    range      : [-32768, 32767]
    granularity: per-tensor, zero_point=0
  Output 0:
    data_type  : int16
    range      : [-32768, 32767]
    granularity: per-tensor, zero_point=0

ADD
  Input 0:
    data_type  : int16
    range      : [-32768, 32767]
    granularity: per-tensor, zero_point=0
  Input 1:
    data_type  : int16
    range      : [-32768, 32767]
    granularity: per-tensor, zero_point=0
  Output 0:
    data_type  : int16
    range      : [-32768, 32767]
    granularity: per-tensor, zero_point=0
Copybara import of the project:

--
b94cb4732a by Elena Zhelezina <elena.zhelezina@arm.com>:

Added 16-bit version of ADD/SUB operators. Broadcasting is included.

--
924d0b72c5 by Elena Zhelezina <elena.zhelezina@arm.com>:

Addressed reviewer comments.

--
dd0d9e8f03 by Elena Zhelezina <elena.zhelezina@arm.com>:

Added versioning to ADD/SUB + some rework of the existing code.

--
abae3fd9a9 by Elena Zhelezina <elena.zhelezina@arm.com>:

Added versioning for ADD/SUB with new option in the schema.fbs
schema_generated.h is edited manually.

--
24f3f5593a by Elena Zhelezina <elena.zhelezina@arm.com>:

Fix for broken build.

--
d252fe175a by Elena Zhelezina <elena.zhelezina@arm.com>:

Fix for the failing internal test for NN delegates.

--
2223a5c380 by Elena Zhelezina <elena.zhelezina@arm.com>:

Fix for asan failures.

Change-Id: I2cf421ddda7f9e802202239136ab062bcd63b4aa

--
3c219a46ce by Elena Zhelezina <elena.zhelezina@arm.com>:

Added broadcast params to addsub structure.

Change-Id: I61d7d4a94087d052a782890799211031f6ed3015

--
9131a38c77 by Elena Zhelezina <elena.zhelezina@arm.com>:

Corrected defaults.

Change-Id: I9ea50c75014cc03ac91fdef0f5b4fe11395f7074
PiperOrigin-RevId: 324865496
This commit is contained in:
Jared Duke 2020-08-04 12:26:02 -07:00 committed by TensorFlower Gardener
parent beab9b83ea
commit 6be604aaac
16 changed files with 321 additions and 73 deletions

View File

@ -199,6 +199,8 @@ typedef struct {
typedef struct { typedef struct {
TfLiteFusedActivation activation; TfLiteFusedActivation activation;
// Parameter added for the version 4.
bool pot_scale_int16;
} TfLiteAddParams; } TfLiteAddParams;
typedef struct { typedef struct {
@ -220,6 +222,8 @@ typedef struct {
typedef struct { typedef struct {
TfLiteFusedActivation activation; TfLiteFusedActivation activation;
// Parameter added for the version 5.
bool pot_scale_int16;
} TfLiteSubParams; } TfLiteSubParams;
typedef struct { typedef struct {

View File

@ -896,6 +896,7 @@ TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter,
if (schema_params != nullptr) { if (schema_params != nullptr) {
params->activation = params->activation =
ConvertActivation(schema_params->fused_activation_function()); ConvertActivation(schema_params->fused_activation_function());
params->pot_scale_int16 = schema_params->pot_scale_int16();
} else { } else {
// TODO(b/157480169): We should either return kTfLiteError or fill in some // TODO(b/157480169): We should either return kTfLiteError or fill in some
// reasonable defaults in the params struct. We are not doing so until we // reasonable defaults in the params struct. We are not doing so until we
@ -1631,6 +1632,7 @@ TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter,
if (schema_params != nullptr) { if (schema_params != nullptr) {
params->activation = params->activation =
ConvertActivation(schema_params->fused_activation_function()); ConvertActivation(schema_params->fused_activation_function());
params->pot_scale_int16 = schema_params->pot_scale_int16();
} else { } else {
// TODO(b/157480169): We should either return kTfLiteError or fill in some // TODO(b/157480169): We should either return kTfLiteError or fill in some
// reasonable defaults in the params struct. We are not doing so until we // reasonable defaults in the params struct. We are not doing so until we

View File

@ -47,6 +47,7 @@ TEST(Writer, FloatModelTest) {
TfLiteAddParams* builtin_data = TfLiteAddParams* builtin_data =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams))); reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
builtin_data->activation = kTfLiteActNone; builtin_data->activation = kTfLiteActNone;
builtin_data->pot_scale_int16 = false;
const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1);
interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0,
reinterpret_cast<void*>(builtin_data), reg); reinterpret_cast<void*>(builtin_data), reg);
@ -84,6 +85,7 @@ TEST(Writer, CustomInputOutputTest) {
TfLiteAddParams* builtin_data = TfLiteAddParams* builtin_data =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams))); reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
builtin_data->activation = kTfLiteActNone; builtin_data->activation = kTfLiteActNone;
builtin_data->pot_scale_int16 = false;
const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1);
interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0,
reinterpret_cast<void*>(builtin_data), reg); reinterpret_cast<void*>(builtin_data), reg);
@ -131,6 +133,7 @@ TEST(Writer, CustomInputOutputErrorCasesTest) {
TfLiteAddParams* builtin_data = TfLiteAddParams* builtin_data =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams))); reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
builtin_data->activation = kTfLiteActNone; builtin_data->activation = kTfLiteActNone;
builtin_data->pot_scale_int16 = false;
const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1);
interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0,
reinterpret_cast<void*>(builtin_data), reg); reinterpret_cast<void*>(builtin_data), reg);
@ -173,6 +176,7 @@ TEST(Writer, PerTensorQuantizedModelTest) {
TfLiteAddParams* builtin_data = TfLiteAddParams* builtin_data =
reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams))); reinterpret_cast<TfLiteAddParams*>(malloc(sizeof(TfLiteAddParams)));
builtin_data->activation = kTfLiteActNone; builtin_data->activation = kTfLiteActNone;
builtin_data->pot_scale_int16 = false;
const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1);
interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0,
reinterpret_cast<void*>(builtin_data), reg); reinterpret_cast<void*>(builtin_data), reg);

View File

@ -68,6 +68,11 @@ struct OpData {
int32 input1_offset; int32 input1_offset;
int32 input2_offset; int32 input2_offset;
int32 output_offset; int32 output_offset;
// This parameter is used to indicate whether
// parameter scale is power of two.
// It is used in 16-bit -> 16-bit quantization.
bool pot_scale_int16;
}; };
void* Init(TfLiteContext* context, const char* buffer, size_t length) { void* Init(TfLiteContext* context, const char* buffer, size_t length) {
@ -103,12 +108,55 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
output_size = TfLiteIntArrayCopy(input1->dims); output_size = TfLiteIntArrayCopy(input1->dims);
} }
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { // 8bit -> 8bit general quantized path, with general rescalings
// as well as, int16 -> int16 with general rescalings
bool pot_scale_int16 = true;
bool input1_scale_is_pot = false;
bool input2_scale_is_pot = false;
bool output_scale_is_pot = false;
int input1_scale_log2_rounded{0};
int input2_scale_log2_rounded{0};
int output_scale_log2_rounded{0};
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
output->type == kTfLiteInt16) {
// In case of 16-bit, there are two implementation:
// the scale parameter is a general number
// the scale parameter is POT and
// zero_point is zero for inputs/output.
pot_scale_int16 = (input1->params.zero_point == 0) &&
(input2->params.zero_point == 0) &&
(output->params.zero_point == 0);
input1_scale_is_pot =
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
input2_scale_is_pot =
CheckedLog2(input2->params.scale, &input2_scale_log2_rounded);
output_scale_is_pot =
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
pot_scale_int16 &=
input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot;
}
data->pot_scale_int16 = pot_scale_int16;
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
!pot_scale_int16) {
// 8bit -> 8bit general quantized path, with general rescalings // 8bit -> 8bit general quantized path, with general rescalings
// as well as, 16bit -> 16bit with general rescalings
data->input1_offset = -input1->params.zero_point; data->input1_offset = -input1->params.zero_point;
data->input2_offset = -input2->params.zero_point; data->input2_offset = -input2->params.zero_point;
data->output_offset = output->params.zero_point; data->output_offset = output->params.zero_point;
data->left_shift = 20;
// The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly.
// In case of 16-bit we have 65535 << 15 which is less than 1 << 31,
// therefore the addition will still fit in a 32 bit accumulator.
data->left_shift = !pot_scale_int16 ? 15 : 20;
const double twice_max_input_scale = const double twice_max_input_scale =
2 * std::max(input1->params.scale, input2->params.scale); 2 * std::max(input1->params.scale, input2->params.scale);
const double real_input1_multiplier = const double real_input1_multiplier =
@ -144,19 +192,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0); TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
int input1_scale_log2_rounded;
bool input1_scale_is_pot =
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
TF_LITE_ENSURE(context, input1_scale_is_pot); TF_LITE_ENSURE(context, input1_scale_is_pot);
int input2_scale_log2_rounded;
bool input2_scale_is_pot =
CheckedLog2(input2->params.scale, &input2_scale_log2_rounded);
TF_LITE_ENSURE(context, input2_scale_is_pot); TF_LITE_ENSURE(context, input2_scale_is_pot);
int output_scale_log2_rounded;
bool output_scale_is_pot =
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
TF_LITE_ENSURE(context, output_scale_is_pot); TF_LITE_ENSURE(context, output_scale_is_pot);
data->input1_shift = input1_scale_log2_rounded - output_scale_log2_rounded; data->input1_shift = input1_scale_log2_rounded - output_scale_log2_rounded;
@ -231,7 +268,8 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
const TfLiteTensor* input1, const TfLiteTensor* input1,
const TfLiteTensor* input2, const TfLiteTensor* input2,
TfLiteTensor* output) { TfLiteTensor* output) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
!data->pot_scale_int16) {
tflite::ArithmeticParams op_params; tflite::ArithmeticParams op_params;
op_params.left_shift = data->left_shift; op_params.left_shift = data->left_shift;
op_params.input1_offset = data->input1_offset; op_params.input1_offset = data->input1_offset;
@ -266,6 +304,15 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
TF_LITE_ADD(optimized_integer_ops, Add, int8_t); TF_LITE_ADD(optimized_integer_ops, Add, int8_t);
} }
} }
} else if (output->type == kTfLiteInt16) {
if (need_broadcast) {
TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, int16_t);
} else {
reference_ops::Add(
op_params, GetTensorShape(input1), GetTensorData<int16_t>(input1),
GetTensorShape(input2), GetTensorData<int16_t>(input2),
GetTensorShape(output), GetTensorData<int16_t>(output), false);
}
} else { } else {
if (kernel_type == kReference) { if (kernel_type == kReference) {
if (need_broadcast) { if (need_broadcast) {
@ -283,12 +330,12 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
} }
#undef TF_LITE_ADD #undef TF_LITE_ADD
} else if (output->type == kTfLiteInt16) { } else if (output->type == kTfLiteInt16) {
tflite::ArithmeticParams op_params;
op_params.input1_shift = data->input1_shift;
op_params.input2_shift = data->input2_shift;
SetActivationParams(data->output_activation_min,
data->output_activation_max, &op_params);
#define TF_LITE_ADD(type, opname) \ #define TF_LITE_ADD(type, opname) \
tflite::ArithmeticParams op_params; \
op_params.input1_shift = data->input1_shift; \
op_params.input2_shift = data->input2_shift; \
SetActivationParams(data->output_activation_min, \
data->output_activation_max, &op_params); \
type::opname(op_params, GetTensorShape(input1), \ type::opname(op_params, GetTensorShape(input1), \
GetTensorData<int16_t>(input1), GetTensorShape(input2), \ GetTensorData<int16_t>(input1), GetTensorShape(input2), \
GetTensorData<int16_t>(input2), GetTensorShape(output), \ GetTensorData<int16_t>(input2), GetTensorShape(output), \

View File

@ -310,15 +310,18 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) {
const float kMin = -1.f; const float kMin = -1.f;
const float kMax = 32767.f / 32768.f; const float kMax = 32767.f / 32768.f;
float kQuantizedTolerance = GetToleranceInt16(kMin, kMax); float kQuantizedTolerance = GetToleranceInt16(kMin, kMax);
std::vector<std::vector<float>> inputs1 = { std::vector<std::vector<float>> inputs1 = {{0.1, 0.2, 0.3, 0.4, 0.9, 0.7},
{0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; {-0.8, 0.2, 0.4, 0.7, 0.1, 0.0},
std::vector<std::vector<float>> inputs2 = { {-0.8, 0.2, 0.7, 0.3, 0.9, 0.1}};
{0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}}; std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.3, 0.1, -0.1, 0.3},
std::vector<std::vector<float>> results = { {0.6, 0.4, 0.5, -0.8, 0.0, -1.0},
{0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}}; {0.6, 0.4, -0.8, 0.5, -0.9, 0.1}};
std::vector<std::vector<float>> results = {{0.7, 0.6, 0.6, 0.5, 0.8, 1.0},
{-0.2, 0.6, 0.9, -0.1, 0.1, -1.0},
{-0.2, 0.6, -0.1, 0.8, 0.0, 0.2}};
for (size_t i = 0; i < inputs1.size(); ++i) { for (size_t i = 0; i < inputs1.size(); ++i) {
QuantizedAddOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, QuantizedAddOpModel m({TensorType_INT16, {1, 2, 3, 1}, kMin, kMax},
{TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, {TensorType_INT16, {1, 2, 3, 1}, kMin, kMax},
{TensorType_INT16, {}, kMin, kMax}, {TensorType_INT16, {}, kMin, kMax},
ActivationFunctionType_NONE); ActivationFunctionType_NONE);
m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]); m.QuantizeAndPopulate<int16_t>(m.input1(), inputs1[i]);
@ -439,6 +442,10 @@ TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt8) {
QuantizedWithScalarBroadcast<TensorType_INT8, int8_t>(); QuantizedWithScalarBroadcast<TensorType_INT8, int8_t>();
} }
TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt16) {
QuantizedWithScalarBroadcast<TensorType_INT16, int16_t>();
}
template <enum TensorType tensor_type, typename integer_dtype> template <enum TensorType tensor_type, typename integer_dtype>
void QuantizedWithMixedBroadcast() { void QuantizedWithMixedBroadcast() {
float kQuantizedTolerance = GetTolerance(-3.f, 3.f); float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
@ -501,6 +508,10 @@ TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt8) {
QuantizedWithMixedBroadcast<TensorType_INT8, int8_t>(); QuantizedWithMixedBroadcast<TensorType_INT8, int8_t>();
} }
TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt16) {
QuantizedWithMixedBroadcast<TensorType_INT16, int16_t>();
}
template <enum TensorType tensor_type, typename integer_dtype> template <enum TensorType tensor_type, typename integer_dtype>
void QuantizedWithGenericBroadcast() { void QuantizedWithGenericBroadcast() {
float kQuantizedTolerance = GetTolerance(-1.0, 1.0); float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
@ -527,5 +538,9 @@ TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt8) {
QuantizedWithGenericBroadcast<TensorType_INT8, int8_t>(); QuantizedWithGenericBroadcast<TensorType_INT8, int8_t>();
} }
TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt16) {
QuantizedWithGenericBroadcast<TensorType_INT16, int16_t>();
}
} // namespace } // namespace
} // namespace tflite } // namespace tflite

View File

@ -51,13 +51,18 @@ inline void Add(const ArithmeticParams& params,
// Element-wise add that can often be used for inner loop of broadcast add as // Element-wise add that can often be used for inner loop of broadcast add as
// well as the non-broadcast add. // well as the non-broadcast add.
// This function is used for 8-bit as well as for 16-bit, but the accumulator
// is 32-bit for both cases. The overflow does not happen due to the
// choice of the shift (20 or 15, accordingly - see add.cc for more comments).
template <typename T>
inline void AddElementwise(int size, const ArithmeticParams& params, inline void AddElementwise(int size, const ArithmeticParams& params,
const uint8_t* input1_data, const T* input1_data, const T* input2_data,
const uint8_t* input2_data, uint8_t* output_data) { T* output_data) {
TFLITE_DCHECK_GT(params.input1_offset, -256); TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits<T>::max());
TFLITE_DCHECK_GT(params.input2_offset, -256); TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits<T>::max());
TFLITE_DCHECK_LT(params.input1_offset, 256); TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits<T>::max());
TFLITE_DCHECK_LT(params.input2_offset, 256); TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits<T>::max());
for (int i = 0; i < size; ++i) { for (int i = 0; i < size; ++i) {
const int32_t input1_val = params.input1_offset + input1_data[i]; const int32_t input1_val = params.input1_offset + input1_data[i];
@ -78,7 +83,7 @@ inline void AddElementwise(int size, const ArithmeticParams& params,
const int32_t clamped_output = const int32_t clamped_output =
std::min(params.quantized_activation_max, std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output)); std::max(params.quantized_activation_min, raw_output));
output_data[i] = static_cast<uint8_t>(clamped_output); output_data[i] = static_cast<T>(clamped_output);
} }
} }
@ -132,10 +137,38 @@ inline void Add(const ArithmeticParams& params,
AddElementwise(flat_size, params, input1_data, input2_data, output_data); AddElementwise(flat_size, params, input1_data, input2_data, output_data);
} }
inline void AddGeneralParamScale(const ArithmeticParams& params,
const RuntimeShape& input1_shape,
const int16_t* input1_data,
const RuntimeShape& input2_shape,
const int16_t* input2_data,
const RuntimeShape& output_shape,
int16_t* output_data) {
TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max);
const int flat_size =
MatchingElementsSize(input1_shape, input2_shape, output_shape);
int max_value = std::numeric_limits<int16_t>::max();
TFLITE_DCHECK_GT(params.input1_offset, -max_value);
TFLITE_DCHECK_GT(params.input2_offset, -max_value);
TFLITE_DCHECK_LT(params.input1_offset, max_value);
TFLITE_DCHECK_LT(params.input2_offset, max_value);
AddElementwise(flat_size, params, input1_data, input2_data, output_data);
}
inline void Add(const ArithmeticParams& params, inline void Add(const ArithmeticParams& params,
const RuntimeShape& input1_shape, const int16_t* input1_data, const RuntimeShape& input1_shape, const int16_t* input1_data,
const RuntimeShape& input2_shape, const int16_t* input2_data, const RuntimeShape& input2_shape, const int16_t* input2_data,
const RuntimeShape& output_shape, int16_t* output_data) { const RuntimeShape& output_shape, int16_t* output_data,
bool pot_scale = true) {
if (!pot_scale) {
AddGeneralParamScale(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
return;
}
TFLITE_DCHECK_LE(params.quantized_activation_min, TFLITE_DCHECK_LE(params.quantized_activation_min,
params.quantized_activation_max); params.quantized_activation_max);
@ -258,13 +291,14 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
} }
} }
inline void BroadcastAdd4DSlow(const ArithmeticParams& params, // This function is used for 8-bit as well as for 16-bit, but the accumulator
const RuntimeShape& input1_shape, // is 32-bit for both cases. The overflow does not happen due to the
const uint8_t* input1_data, // choice of the shift (20 or 15, accordingly - see add.cc for more comments).
const RuntimeShape& input2_shape, template <typename T>
const uint8_t* input2_data, inline void BroadcastAdd4DSlow(
const RuntimeShape& output_shape, const ArithmeticParams& params, const RuntimeShape& input1_shape,
uint8_t* output_data) { const T* input1_data, const RuntimeShape& input2_shape,
const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
NdArrayDesc<4> desc1; NdArrayDesc<4> desc1;
NdArrayDesc<4> desc2; NdArrayDesc<4> desc2;
NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1,
@ -314,7 +348,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params,
std::min(params.quantized_activation_max, std::min(params.quantized_activation_max,
std::max(params.quantized_activation_min, raw_output)); std::max(params.quantized_activation_min, raw_output));
output_data[Offset(extended_output_shape, b, y, x, c)] = output_data[Offset(extended_output_shape, b, y, x, c)] =
static_cast<uint8_t>(clamped_output); static_cast<T>(clamped_output);
} }
} }
} }

View File

@ -89,8 +89,8 @@ BuiltinOpResolver::BuiltinOpResolver() {
/* min_version = */ 1, /* min_version = */ 1,
/* max_version = */ 3); /* max_version = */ 3);
AddBuiltin(BuiltinOperator_ADD, Register_ADD(), AddBuiltin(BuiltinOperator_ADD, Register_ADD(),
/* min_version = */ 1, /* min_version */ 1,
/* max_version = */ 2); /* max_version */ 4);
AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND(), AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND(),
/* min_version = */ 1, /* min_version = */ 1,
/* max_version = */ 3); /* max_version = */ 3);
@ -143,7 +143,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
/* max_version */ 2); /* max_version */ 2);
AddBuiltin(BuiltinOperator_SUB, Register_SUB(), AddBuiltin(BuiltinOperator_SUB, Register_SUB(),
/* min_version = */ 1, /* min_version = */ 1,
/* max_version = */ 4); /* max_version = */ 5);
AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(),
/* min_version = */ 1, /* min_version = */ 1,
/* max_version = */ 4); /* max_version = */ 4);

View File

@ -71,6 +71,11 @@ struct OpData {
int32 input1_offset; int32 input1_offset;
int32 input2_offset; int32 input2_offset;
int32 output_offset; int32 output_offset;
// This parameter is used to indicate whether
// parameter scale is power of two.
// It is used in 16-bit -> 16-bit quantization.
bool pot_scale_int16;
}; };
void* Init(TfLiteContext* context, const char* buffer, size_t length) { void* Init(TfLiteContext* context, const char* buffer, size_t length) {
@ -83,13 +88,14 @@ void Free(TfLiteContext* context, void* buffer) {
delete reinterpret_cast<OpData*>(buffer); delete reinterpret_cast<OpData*>(buffer);
} }
TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, TfLiteStatus PrepareGeneralSubOp(TfLiteContext* context,
const TfLiteTensor* input_1, const TfLiteTensor* input_1,
const TfLiteTensor* input_2, TfLiteTensor* output, const TfLiteTensor* input_2,
TfLiteSubParams* params, OpData* op_params, TfLiteTensor* output, TfLiteSubParams* params,
int op_sign) { OpData* op_params, int op_sign) {
TF_LITE_ENSURE(context, TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 ||
output->type == kTfLiteUInt8 || output->type == kTfLiteInt8); output->type == kTfLiteInt8 ||
output->type == kTfLiteInt16);
const auto& input1_quantization_params = input_1->params; const auto& input1_quantization_params = input_1->params;
const auto& input2_quantization_params = input_2->params; const auto& input2_quantization_params = input_2->params;
const auto& output_quantization_params = output->params; const auto& output_quantization_params = output->params;
@ -98,6 +104,9 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context,
if (output->type == kTfLiteUInt8) { if (output->type == kTfLiteUInt8) {
integer_type_min = std::numeric_limits<uint8_t>::min(); integer_type_min = std::numeric_limits<uint8_t>::min();
integer_type_max = std::numeric_limits<uint8_t>::max(); integer_type_max = std::numeric_limits<uint8_t>::max();
} else if (output->type == kTfLiteInt16) {
integer_type_min = std::numeric_limits<int16_t>::min();
integer_type_max = std::numeric_limits<int16_t>::max();
} else { } else {
// output->type == kTfLiteInt8 // output->type == kTfLiteInt8
integer_type_min = std::numeric_limits<int8_t>::min(); integer_type_min = std::numeric_limits<int8_t>::min();
@ -120,7 +129,11 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context,
op_params->input1_offset = -input1_quantization_params.zero_point; op_params->input1_offset = -input1_quantization_params.zero_point;
op_params->input2_offset = -input2_quantization_params.zero_point; op_params->input2_offset = -input2_quantization_params.zero_point;
op_params->output_offset = output_quantization_params.zero_point; op_params->output_offset = output_quantization_params.zero_point;
op_params->left_shift = 20;
// The shift is set to 15 in case of 16-bit and 20 in case of 8-bit,
// accordingly. In case of 16-bit we have 65535 << 15 which is less than 1 <<
// 31, therefore the addition will still fit in a 32 bit accumulator.
op_params->left_shift = output->type == kTfLiteInt16 ? 15 : 20;
const double twice_max_input_scale = const double twice_max_input_scale =
2 * std::max(input1_quantization_params.scale, 2 * std::max(input1_quantization_params.scale,
input2_quantization_params.scale); input2_quantization_params.scale);
@ -146,13 +159,15 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context,
TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
context, params->activation, output, &op_params->output_activation_min, context, params->activation, output, &op_params->output_activation_min,
&op_params->output_activation_max)); &op_params->output_activation_max));
return kTfLiteOk; return kTfLiteOk;
} }
TfLiteStatus PrepareInt16SubOp(TfLiteContext* context, TfLiteStatus PrepareInt16SubOpPOT(TfLiteContext* context,
const TfLiteTensor* input1, const TfLiteTensor* input1,
const TfLiteTensor* input2, TfLiteTensor* output, const TfLiteTensor* input2,
TfLiteSubParams* params, OpData* data) { TfLiteTensor* output, TfLiteSubParams* params,
OpData* data) {
// 16bit -> 16bit special quantized path, supporting only a rather // 16bit -> 16bit special quantized path, supporting only a rather
// narrow case of quantization parameters: zero_points must all be 0 // narrow case of quantization parameters: zero_points must all be 0
// ("symmetric quantization") and scales must be power-of-two (which // ("symmetric quantization") and scales must be power-of-two (which
@ -219,12 +234,51 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
output_size = TfLiteIntArrayCopy(input1->dims); output_size = TfLiteIntArrayCopy(input1->dims);
} }
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { // 8bit -> 8bit general quantized path, with general rescalings
TF_LITE_ENSURE_OK(context, Prepare8BitSubOp(context, input1, input2, output, // as well as, 16bit -> 16bit with general rescalings
params, data, -1)); bool pot_scale_int16 = true;
bool input1_scale_is_pot = false;
bool input2_scale_is_pot = false;
bool output_scale_is_pot = false;
int input1_scale_log2_rounded{0};
int input2_scale_log2_rounded{0};
int output_scale_log2_rounded{0};
if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 &&
output->type == kTfLiteInt16) {
// In case of 16-bit, there are two implementation:
// the scale parameter is a general number
// the scale parameter is POT and
// zero_point is zero for inputs/output.
pot_scale_int16 = (input1->params.zero_point == 0) &&
(input2->params.zero_point == 0) &&
(output->params.zero_point == 0);
input1_scale_is_pot =
CheckedLog2(input1->params.scale, &input1_scale_log2_rounded);
input2_scale_is_pot =
CheckedLog2(input2->params.scale, &input2_scale_log2_rounded);
output_scale_is_pot =
CheckedLog2(output->params.scale, &output_scale_log2_rounded);
pot_scale_int16 &=
input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot;
}
data->pot_scale_int16 = pot_scale_int16;
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 ||
!pot_scale_int16) {
TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2,
output, params, data, -1));
} else if (output->type == kTfLiteInt16) { } else if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE_OK(context, PrepareInt16SubOp(context, input1, input2, // LSTM-special case with scale parameter of POT
output, params, data)); TF_LITE_ENSURE_OK(context, PrepareInt16SubOpPOT(context, input1, input2,
output, params, data));
} }
return context->ResizeTensor(context, output, output_size); return context->ResizeTensor(context, output, output_size);
@ -332,6 +386,15 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
} else { } else {
TF_LITE_SUB(reference_integer_ops, Add, int8_t); TF_LITE_SUB(reference_integer_ops, Add, int8_t);
} }
} else if (!data->pot_scale_int16) {
if (need_broadcast) {
TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, int16_t);
} else {
reference_ops::Add(op_params, GetTensorShape(input1),
GetTensorData<int16_t>(input1), GetTensorShape(input2),
GetTensorData<int16_t>(input2), GetTensorShape(output),
GetTensorData<int16_t>(output), false);
}
} else if (output->type == kTfLiteUInt8) { } else if (output->type == kTfLiteUInt8) {
if (kernel_type == kReference) { if (kernel_type == kReference) {
if (need_broadcast) { if (need_broadcast) {

View File

@ -304,6 +304,10 @@ TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt8) {
QuantizedTestsNoActivation<TensorType_INT8, int8_t>(); QuantizedTestsNoActivation<TensorType_INT8, int8_t>();
} }
TEST(QuantizedSubOpModel, QuantizedTestsNoActivationGenericInt16) {
QuantizedTestsNoActivation<TensorType_INT16, int16_t>();
}
template <TensorType tensor_type, typename integer_dtype> template <TensorType tensor_type, typename integer_dtype>
void QuantizedTestsActivationRELU_N1_TO_1() { void QuantizedTestsActivationRELU_N1_TO_1() {
float kQuantizedTolerance = GetTolerance(-1.0, 1.0); float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
@ -365,6 +369,10 @@ TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt8) {
QuantizedVariousInputShapes<TensorType_INT8, int8_t>(); QuantizedVariousInputShapes<TensorType_INT8, int8_t>();
} }
TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt16) {
QuantizedVariousInputShapes<TensorType_INT16, int16_t>();
}
template <TensorType tensor_type, typename integer_dtype> template <TensorType tensor_type, typename integer_dtype>
void QuantizedWithBroadcast() { void QuantizedWithBroadcast() {
float kQuantizedTolerance = GetTolerance(-3.0, 3.0); float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
@ -393,6 +401,10 @@ TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt8) {
QuantizedWithBroadcast<TensorType_INT8, int8_t>(); QuantizedWithBroadcast<TensorType_INT8, int8_t>();
} }
TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt16) {
QuantizedWithBroadcast<TensorType_INT16, int16_t>();
}
TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) { TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) {
const float kMin = -1.f; const float kMin = -1.f;
const float kMax = const float kMax =

View File

@ -583,6 +583,8 @@ table ConcatenationOptions {
table AddOptions { table AddOptions {
fused_activation_function:ActivationFunctionType; fused_activation_function:ActivationFunctionType;
// Parameters supported by version 4.
pot_scale_int16:bool = true;
} }
table MulOptions { table MulOptions {
@ -704,6 +706,8 @@ table DepthToSpaceOptions {
table SubOptions { table SubOptions {
fused_activation_function:ActivationFunctionType; fused_activation_function:ActivationFunctionType;
// Parameters supported by version 5
pot_scale_int16:bool = true;
} }
table DivOptions { table DivOptions {

View File

@ -4742,22 +4742,29 @@ flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(flatbuffers
struct AddOptionsT : public flatbuffers::NativeTable { struct AddOptionsT : public flatbuffers::NativeTable {
typedef AddOptions TableType; typedef AddOptions TableType;
bool pot_scale_int16;
tflite::ActivationFunctionType fused_activation_function; tflite::ActivationFunctionType fused_activation_function;
AddOptionsT() AddOptionsT()
: fused_activation_function(tflite::ActivationFunctionType_NONE) { : pot_scale_int16(true),
fused_activation_function(tflite::ActivationFunctionType_NONE) {
} }
}; };
struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
typedef AddOptionsT NativeTableType; typedef AddOptionsT NativeTableType;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_FUSED_ACTIVATION_FUNCTION = 4 VT_FUSED_ACTIVATION_FUNCTION = 4,
VT_POT_SCALE_INT16 = 6
}; };
bool pot_scale_int16() const {
return GetField<uint8_t>(VT_POT_SCALE_INT16, 0) != 0;
}
tflite::ActivationFunctionType fused_activation_function() const { tflite::ActivationFunctionType fused_activation_function() const {
return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
} }
bool Verify(flatbuffers::Verifier &verifier) const { bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) && return VerifyTableStart(verifier) &&
VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) &&
VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
verifier.EndTable(); verifier.EndTable();
} }
@ -5907,22 +5914,29 @@ flatbuffers::Offset<DepthToSpaceOptions> CreateDepthToSpaceOptions(flatbuffers::
struct SubOptionsT : public flatbuffers::NativeTable { struct SubOptionsT : public flatbuffers::NativeTable {
typedef SubOptions TableType; typedef SubOptions TableType;
bool pot_scale_int16;
tflite::ActivationFunctionType fused_activation_function; tflite::ActivationFunctionType fused_activation_function;
SubOptionsT() SubOptionsT()
: fused_activation_function(tflite::ActivationFunctionType_NONE) { : pot_scale_int16(true),
fused_activation_function(tflite::ActivationFunctionType_NONE) {
} }
}; };
struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
typedef SubOptionsT NativeTableType; typedef SubOptionsT NativeTableType;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
VT_FUSED_ACTIVATION_FUNCTION = 4 VT_FUSED_ACTIVATION_FUNCTION = 4,
VT_POT_SCALE_INT16 = 6
}; };
bool pot_scale_int16() const {
return GetField<uint8_t>(VT_POT_SCALE_INT16, 0) != 0;
}
tflite::ActivationFunctionType fused_activation_function() const { tflite::ActivationFunctionType fused_activation_function() const {
return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0)); return static_cast<tflite::ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
} }
bool Verify(flatbuffers::Verifier &verifier) const { bool Verify(flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) && return VerifyTableStart(verifier) &&
VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) &&
VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
verifier.EndTable(); verifier.EndTable();
} }

View File

@ -53,12 +53,15 @@ std::string GetMinimumRuntimeVersionForModel(const Model& model) {
{{OperatorType::kDepthwiseConv, 5}, kPendingReleaseOpVersion}, {{OperatorType::kDepthwiseConv, 5}, kPendingReleaseOpVersion},
{{OperatorType::kAdd, 1}, "1.5.0"}, {{OperatorType::kAdd, 1}, "1.5.0"},
{{OperatorType::kAdd, 2}, "1.14.0"}, {{OperatorType::kAdd, 2}, "1.14.0"},
{{OperatorType::kAdd, 3}, kPendingReleaseOpVersion},
{{OperatorType::kAddN, 1}, "1.14.0"}, {{OperatorType::kAddN, 1}, "1.14.0"},
{{OperatorType::kSpaceToBatchND, 1}, "1.6.0"}, {{OperatorType::kSpaceToBatchND, 1}, "1.6.0"},
{{OperatorType::kSpaceToBatchND, 2}, "1.14.0"}, {{OperatorType::kSpaceToBatchND, 2}, "1.14.0"},
{{OperatorType::kSub, 1}, "1.6.0"}, {{OperatorType::kSub, 1}, "1.6.0"},
{{OperatorType::kSub, 2}, "1.14.0"}, {{OperatorType::kSub, 2}, "1.14.0"},
{{OperatorType::kSub, 3}, "1.15.0"},
{{OperatorType::kSub, 4}, kPendingReleaseOpVersion}, {{OperatorType::kSub, 4}, kPendingReleaseOpVersion},
{{OperatorType::kSub, 5}, kPendingReleaseOpVersion},
{{OperatorType::kDiv, 1}, "1.6.0"}, {{OperatorType::kDiv, 1}, "1.6.0"},
{{OperatorType::kBatchToSpaceND, 1}, "1.6.0"}, {{OperatorType::kBatchToSpaceND, 1}, "1.6.0"},
{{OperatorType::kBatchToSpaceND, 2}, "1.14.0"}, {{OperatorType::kBatchToSpaceND, 2}, "1.14.0"},

View File

@ -276,10 +276,10 @@ class Sub : public BuiltinOperator<SubOperator, ::tflite::SubOptions,
::tflite::OpSignature op_sig = ::tflite::OpSignature op_sig =
GetVersioningOpSig(builtin_op(), op_signature); GetVersioningOpSig(builtin_op(), op_signature);
if (input1_array.has_shape() && input2_array.has_shape()) { if (input1_array.has_shape() && input2_array.has_shape()) {
op_sig.options.broadcast.num_dims = op_sig.options.addsub.num_dims =
std::max(input1_array.shape().dimensions_count(), std::max(input1_array.shape().dimensions_count(),
input2_array.shape().dimensions_count()); input2_array.shape().dimensions_count());
op_sig.options.broadcast.need_broadcast = op_sig.options.addsub.need_broadcast =
(input1_array.shape() != input2_array.shape()); (input1_array.shape() != input2_array.shape());
} }
return ::tflite::GetBuiltinOperatorVersion(op_sig); return ::tflite::GetBuiltinOperatorVersion(op_sig);

View File

@ -450,13 +450,31 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
} }
return 1; return 1;
case BuiltinOperator_ADD:
if (op_sig.input_types.at(0) == TensorType_INT16 &&
op_sig.output_types.at(0) == TensorType_INT16) {
if (!op_sig.options.addsub.pot_scale_int16) {
return 3;
}
}
if (op_sig.input_types.at(0) == TensorType_INT8) {
return 2;
}
return 1;
case BuiltinOperator_SUB: case BuiltinOperator_SUB:
if (op_sig.input_types.at(0) == TensorType_INT16 &&
op_sig.output_types.at(0) == TensorType_INT16) {
if (!op_sig.options.addsub.pot_scale_int16) {
return 5;
}
}
if (!op_sig.input_types.empty() && if (!op_sig.input_types.empty() &&
op_sig.input_types.at(0) == TensorType_INT64) { op_sig.input_types.at(0) == TensorType_INT64) {
return 4; return 4;
} }
if (op_sig.options.broadcast.need_broadcast && if (op_sig.options.addsub.need_broadcast &&
op_sig.options.broadcast.num_dims > 4) { op_sig.options.addsub.num_dims > 4) {
return 3; return 3;
} }
if (op_sig.input_types.at(0) == TensorType_INT8) { if (op_sig.input_types.at(0) == TensorType_INT8) {
@ -542,7 +560,7 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
} }
} }
return 1; return 1;
case BuiltinOperator_ADD:
case BuiltinOperator_SPACE_TO_DEPTH: case BuiltinOperator_SPACE_TO_DEPTH:
case BuiltinOperator_SPLIT_V: case BuiltinOperator_SPLIT_V:
case BuiltinOperator_SUM: case BuiltinOperator_SUM:
@ -669,6 +687,26 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op,
} }
} break; } break;
case BuiltinOperator_ADD: {
auto add_option = op->builtin_options_as_AddOptions();
op_sig.options.addsub.pot_scale_int16 = true;
if (add_option) {
op_sig.options.addsub.pot_scale_int16 = add_option->pot_scale_int16();
}
} break;
case BuiltinOperator_SUB: {
auto sub_option = op->builtin_options_as_SubOptions();
op_sig.options.addsub.need_broadcast =
!HaveSameShapes(subgraph, op, 0, 1);
op_sig.options.addsub.num_dims =
std::max(GetNumDims(subgraph, op, 0), GetNumDims(subgraph, op, 1));
op_sig.options.addsub.pot_scale_int16 = true;
if (sub_option) {
op_sig.options.addsub.pot_scale_int16 = sub_option->pot_scale_int16();
}
} break;
case BuiltinOperator_LSTM: { case BuiltinOperator_LSTM: {
auto lstm_option = op->builtin_options_as_LSTMOptions(); auto lstm_option = op->builtin_options_as_LSTMOptions();
if (lstm_option) { if (lstm_option) {
@ -714,7 +752,7 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op,
case BuiltinOperator_TRANSPOSE: { case BuiltinOperator_TRANSPOSE: {
op_sig.options.single_input_op.num_dims = GetNumDims(subgraph, op, 0); op_sig.options.single_input_op.num_dims = GetNumDims(subgraph, op, 0);
} break; } break;
case BuiltinOperator_SUB:
case BuiltinOperator_DIV: case BuiltinOperator_DIV:
case BuiltinOperator_MAXIMUM: case BuiltinOperator_MAXIMUM:
case BuiltinOperator_MINIMUM: { case BuiltinOperator_MINIMUM: {

View File

@ -63,6 +63,11 @@ typedef struct {
int32_t num_dims; int32_t num_dims;
bool need_broadcast; bool need_broadcast;
} broadcast; } broadcast;
struct {
bool pot_scale_int16;
int32_t num_dims;
bool need_broadcast;
} addsub;
struct { struct {
bool is_per_channel_quantized; bool is_per_channel_quantized;
} conv_2d; } conv_2d;

View File

@ -72,6 +72,8 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code,
{{BuiltinOperator_DEPTHWISE_CONV_2D, 6}, "2.3.0"}, {{BuiltinOperator_DEPTHWISE_CONV_2D, 6}, "2.3.0"},
{{BuiltinOperator_ADD, 1}, "1.5.0"}, {{BuiltinOperator_ADD, 1}, "1.5.0"},
{{BuiltinOperator_ADD, 2}, "1.14.0"}, {{BuiltinOperator_ADD, 2}, "1.14.0"},
{{BuiltinOperator_ADD, 3}, kPendingReleaseVersion},
{{BuiltinOperator_ADD, 4}, kPendingReleaseVersion},
{{BuiltinOperator_ADD_N, 1}, "1.14.0"}, {{BuiltinOperator_ADD_N, 1}, "1.14.0"},
{{BuiltinOperator_SPACE_TO_BATCH_ND, 1}, "1.6.0"}, {{BuiltinOperator_SPACE_TO_BATCH_ND, 1}, "1.6.0"},
{{BuiltinOperator_SPACE_TO_BATCH_ND, 2}, "1.14.0"}, {{BuiltinOperator_SPACE_TO_BATCH_ND, 2}, "1.14.0"},
@ -80,6 +82,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code,
{{BuiltinOperator_SUB, 2}, "1.14.0"}, {{BuiltinOperator_SUB, 2}, "1.14.0"},
{{BuiltinOperator_SUB, 3}, "2.3.0"}, {{BuiltinOperator_SUB, 3}, "2.3.0"},
{{BuiltinOperator_SUB, 4}, kPendingReleaseVersion}, {{BuiltinOperator_SUB, 4}, kPendingReleaseVersion},
{{BuiltinOperator_SUB, 5}, kPendingReleaseVersion},
{{BuiltinOperator_DENSIFY, 1}, "2.2.0"}, {{BuiltinOperator_DENSIFY, 1}, "2.2.0"},
{{BuiltinOperator_DIV, 1}, "1.6.0"}, {{BuiltinOperator_DIV, 1}, "1.6.0"},
{{BuiltinOperator_DIV, 2}, "2.3.0"}, {{BuiltinOperator_DIV, 2}, "2.3.0"},