From 01da0c850b40fe059cddcd5e72a8e3e016571c43 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Wed, 12 Aug 2020 11:57:04 +0100 Subject: [PATCH] 16x8 reference operators: ADD/SUB - inference always using general case + tests for versioning. Change-Id: Ib24a2c2b837eaeb4868afdf1445aa332965a41af --- tensorflow/lite/kernels/add.cc | 44 +++++++++++-------- tensorflow/lite/kernels/register.cc | 2 +- tensorflow/lite/kernels/sub.cc | 43 ++++++++++-------- tensorflow/lite/schema/schema.fbs | 6 +-- .../lite/tools/optimize/operator_property.cc | 1 + .../lite/tools/optimize/quantize_model.cc | 40 ++++++++++++++++- .../tools/optimize/quantize_model_test.cc | 33 ++++++++++---- .../lite/tools/versioning/op_version_test.cc | 25 +++++++++++ 8 files changed, 142 insertions(+), 52 deletions(-) diff --git a/tensorflow/lite/kernels/add.cc b/tensorflow/lite/kernels/add.cc index 7692ae9e54b..06992f7cafe 100644 --- a/tensorflow/lite/kernels/add.cc +++ b/tensorflow/lite/kernels/add.cc @@ -110,7 +110,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // 8bit -> 8bit general quantized path, with general rescalings // as well as, int16 -> int16 with general rescalings - bool pot_scale_int16 = true; + + // There are two implementations of ADD operator in case of + // 16bit input/output depending on whether the scale parameter is + // the power of 2 or not. Currently only implementation for + // general case is used, but we need to use another implementation + // for older versions. + bool general_scale_int16 = false; bool input1_scale_is_pot = false; bool input2_scale_is_pot = false; @@ -122,31 +128,31 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 && output->type == kTfLiteInt16) { - // In case of 16-bit, there are two implementation: - // the scale parameter is a general number - // the scale parameter is POT and - // zero_point is zero for inputs/output. - pot_scale_int16 = (input1->params.zero_point == 0) && - (input2->params.zero_point == 0) && - (output->params.zero_point == 0); + general_scale_int16 = !params || !params->pot_scale_int16; - input1_scale_is_pot = - CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); + if (!general_scale_int16) { + // Do preparation in the case of the scale parameter is power of 2. - input2_scale_is_pot = - CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); + input1_scale_is_pot = + CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); - output_scale_is_pot = - CheckedLog2(output->params.scale, &output_scale_log2_rounded); + input2_scale_is_pot = + CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); - pot_scale_int16 &= - input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot; + output_scale_is_pot = + CheckedLog2(output->params.scale, &output_scale_log2_rounded); + + general_scale_int16 = + !input1_scale_is_pot || !input2_scale_is_pot || + !output_scale_is_pot || input1->params.zero_point != 0 || + input2->params.zero_point != 0 || output->params.zero_point != 0; + } } - data->pot_scale_int16 = pot_scale_int16; + data->pot_scale_int16 = !general_scale_int16; if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || - !pot_scale_int16) { + general_scale_int16) { // 8bit -> 8bit general quantized path, with general rescalings // as well as, 16bit -> 16bit with general rescalings data->input1_offset = -input1->params.zero_point; @@ -156,7 +162,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly. // In case of 16-bit we have 65535 << 15 which is less than 1 << 31, // therefore the addition will still fit in a 32 bit accumulator. - data->left_shift = !pot_scale_int16 ? 15 : 20; + data->left_shift = general_scale_int16 ? 15 : 20; const double twice_max_input_scale = 2 * std::max(input1->params.scale, input2->params.scale); const double real_input1_multiplier = diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index 1d1db9e0403..2e51e752d3f 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -90,7 +90,7 @@ BuiltinOpResolver::BuiltinOpResolver() { /* max_version = */ 3); AddBuiltin(BuiltinOperator_ADD, Register_ADD(), /* min_version */ 1, - /* max_version */ 4); + /* max_version */ 3); AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND(), /* min_version = */ 1, /* max_version = */ 3); diff --git a/tensorflow/lite/kernels/sub.cc b/tensorflow/lite/kernels/sub.cc index f93ebecd46d..255a3666f3a 100644 --- a/tensorflow/lite/kernels/sub.cc +++ b/tensorflow/lite/kernels/sub.cc @@ -236,7 +236,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // 8bit -> 8bit general quantized path, with general rescalings // as well as, 16bit -> 16bit with general rescalings - bool pot_scale_int16 = true; + + // There are two implementations of SUB operator in case of + // 16bit input depending on whether the scale parameter is + // the power of 2 or not. Currently only implementation for + // general case is used, but we need to use another implementation + // for older versions. + bool general_scale_int16 = false; bool input1_scale_is_pot = false; bool input2_scale_is_pot = false; @@ -248,31 +254,30 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 && output->type == kTfLiteInt16) { - // In case of 16-bit, there are two implementation: - // the scale parameter is a general number - // the scale parameter is POT and - // zero_point is zero for inputs/output. - pot_scale_int16 = (input1->params.zero_point == 0) && - (input2->params.zero_point == 0) && - (output->params.zero_point == 0); + general_scale_int16 = !params || !params->pot_scale_int16; - input1_scale_is_pot = - CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); + if (!general_scale_int16) { + // Do preparation in the case of the scale parameter is power of 2. + input1_scale_is_pot = + CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); - input2_scale_is_pot = - CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); + input2_scale_is_pot = + CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); - output_scale_is_pot = - CheckedLog2(output->params.scale, &output_scale_log2_rounded); + output_scale_is_pot = + CheckedLog2(output->params.scale, &output_scale_log2_rounded); - pot_scale_int16 &= - input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot; + general_scale_int16 = + !input1_scale_is_pot || !input2_scale_is_pot || + !output_scale_is_pot || input1->params.zero_point != 0 || + input2->params.zero_point != 0 || output->params.zero_point != 0; + } } - data->pot_scale_int16 = pot_scale_int16; + data->pot_scale_int16 = !general_scale_int16; if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || - !pot_scale_int16) { + general_scale_int16) { TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2, output, params, data, -1)); } else if (output->type == kTfLiteInt16) { @@ -388,7 +393,7 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, } } else if (!data->pot_scale_int16) { if (need_broadcast) { - TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, int16_t); + TF_LITE_SUB(reference_ops, BroadcastSubSlow, int16_t); } else { reference_ops::Add(op_params, GetTensorShape(input1), GetTensorData(input1), GetTensorShape(input2), diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs index baeb49f7b7a..5fbd2fafedb 100644 --- a/tensorflow/lite/schema/schema.fbs +++ b/tensorflow/lite/schema/schema.fbs @@ -583,8 +583,8 @@ table ConcatenationOptions { table AddOptions { fused_activation_function:ActivationFunctionType; - // Parameters supported by version 4. - pot_scale_int16:bool = true; + // Parameters supported by version 3. + pot_scale_int16:bool = false; } table MulOptions { @@ -707,7 +707,7 @@ table DepthToSpaceOptions { table SubOptions { fused_activation_function:ActivationFunctionType; // Parameters supported by version 5 - pot_scale_int16:bool = true; + pot_scale_int16:bool = false; } table DivOptions { diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc index 5ab48d570f5..e15318bda1a 100644 --- a/tensorflow/lite/tools/optimize/operator_property.cc +++ b/tensorflow/lite/tools/optimize/operator_property.cc @@ -913,6 +913,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.inputs = {{0, {}}, {1, {}}}; property.outputs = {{0, {}}}; property.version = 2; + property.restrict_same_input_output_scale = true; break; case BuiltinOperator_SUM: property.inputs = {{0, {}}}; diff --git a/tensorflow/lite/tools/optimize/quantize_model.cc b/tensorflow/lite/tools/optimize/quantize_model.cc index ca9a51abefe..f0766125223 100644 --- a/tensorflow/lite/tools/optimize/quantize_model.cc +++ b/tensorflow/lite/tools/optimize/quantize_model.cc @@ -467,6 +467,44 @@ TfLiteStatus ApplyConstraints( return kTfLiteOk; } +// In case of int16 activations, there are two implementations of kernels for +// ADD/SUB operators. We set the builtin option pot_scale_int16 +// during quantization so that from now only the general case implementation is +// used. +void SetOperatorPropertyADDSUBOperator(ModelT* model, + const TensorType& activations_type) { + if (activations_type != TensorType_INT16) { + // This is needed only in case of int16 activations. + return; + } + + for (int subgraph_idx = 0, end = model->subgraphs.size(); subgraph_idx < end; + subgraph_idx++) { + SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get(); + // Iterate backward to avoid messing with index. + for (int op_idx = subgraph->operators.size() - 1; op_idx >= 0; op_idx--) { + OperatorT* op = subgraph->operators[op_idx].get(); + OperatorCodeT* op_code = model->operator_codes[op->opcode_index].get(); + if (op_code && op_code->builtin_code == BuiltinOperator_ADD) { + { + auto* options = op->builtin_options.AsAddOptions(); + if (options) { + options->pot_scale_int16 = false; + } + } + } + if (op_code && op_code->builtin_code == BuiltinOperator_SUB) { + { + auto* options = op->builtin_options.AsSubOptions(); + if (options) { + options->pot_scale_int16 = false; + } + } + } + } + } +} + std::vector> GetInputs( const OperatorT* op, operator_property::OperatorProperty property) { std::vector> inputs; @@ -1347,7 +1385,7 @@ TfLiteStatus QuantizeModel(flatbuffers::FlatBufferBuilder* builder, utils::SetOperatorCodeVersion(model); TF_LITE_ENSURE_STATUS(SetInputAndOutputTypes( model, input_type, output_type, activations_type, error_reporter)); - + SetOperatorPropertyADDSUBOperator(model, activations_type); flatbuffers::Offset output_model_location = Model::Pack(*builder, model); FinishModelBuffer(*builder, output_model_location); diff --git a/tensorflow/lite/tools/optimize/quantize_model_test.cc b/tensorflow/lite/tools/optimize/quantize_model_test.cc index 0bf6bb8b5a9..4f1de3ffe61 100644 --- a/tensorflow/lite/tools/optimize/quantize_model_test.cc +++ b/tensorflow/lite/tools/optimize/quantize_model_test.cc @@ -998,19 +998,25 @@ TEST_F(QuantizeMultiInputAddWithReshapeTest, VerifyAddQuantization) { EXPECT_EQ(model_.operator_codes[1]->version, 1); } -class QuantizeConstInputTest : public QuantizeModelTest { +class QuantizeConstInputTest : public QuantizeModelTest, + public testing::WithParamInterface { protected: QuantizeConstInputTest() { input_model_ = ReadModel(internal::kConstInputAddModel); readonly_model_ = input_model_->GetModel(); readonly_model_->UnPackTo(&model_); } -}; -TEST_F(QuantizeConstInputTest, VerifyConstOpInput) { - auto status = QuantizeModelAllOperators(&builder_, &model_, TensorType_INT8, - TensorType_INT8, false, - TensorType_INT8, &error_reporter_); + TensorType tensor_type_; +}; +INSTANTIATE_TEST_SUITE_P(QuantizeConstInputTestInst, QuantizeConstInputTest, + testing::ValuesIn({TensorType_INT8, + TensorType_INT16})); + +TEST_P(QuantizeConstInputTest, VerifyConstOpInput) { + auto status = + QuantizeModelAllOperators(&builder_, &model_, tensor_type_, tensor_type_, + false, tensor_type_, &error_reporter_); ASSERT_EQ(kTfLiteOk, status); // Verify ConstOp is quantized. @@ -1030,17 +1036,26 @@ TEST_F(QuantizeConstInputTest, VerifyConstOpInput) { for (size_t input_idx = 0; input_idx < 2; ++input_idx) { EXPECT_EQ(subgraph->tensors[op->inputs[input_idx]].get()->type, - TensorType_INT8); + tensor_type_); } - EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8); + EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, tensor_type_); // check op and versioning. EXPECT_EQ(model_.operator_codes.size(), 1); EXPECT_EQ(model_.operator_codes[0]->builtin_code, BuiltinOperator_ADD); EXPECT_EQ(model_.operator_codes[0]->version, 2); -} + // check that in case of int16 activations, pot_scale_int16 parameter is set + // to false. + if (tensor_type_ == TensorType_INT16) { + EXPECT_EQ(subgraph->operators[0] + .get() + ->builtin_options.AsAddOptions() + ->pot_scale_int16, + false); + } +} class QuantizeArgMaxTest : public QuantizeModelTest { protected: QuantizeArgMaxTest() { diff --git a/tensorflow/lite/tools/versioning/op_version_test.cc b/tensorflow/lite/tools/versioning/op_version_test.cc index a90cb336318..b0723f23174 100644 --- a/tensorflow/lite/tools/versioning/op_version_test.cc +++ b/tensorflow/lite/tools/versioning/op_version_test.cc @@ -301,10 +301,35 @@ TEST(OpVersionTest, VersioningSumTest) { } TEST(OpVersionTest, VersioningAddTest) { + OpSignature fake_op_sig = { + .op = BuiltinOperator_ADD, + .input_types = std::vector{TensorType_INT16}, + .output_types = std::vector{TensorType_INT16} + }; + fake_op_sig.options.addsub.pot_scale_int16 = false; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); + SimpleVersioningTest(BuiltinOperator_ADD); } TEST(OpVersionTest, VersioningSubTest) { + OpSignature fake_op_sig = { + .op = BuiltinOperator_SUB, + .input_types = std::vector{TensorType_INT16}, + .output_types = std::vector{TensorType_INT16} + }; + fake_op_sig.options.addsub.pot_scale_int16 = false; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 5); + + fake_op_sig.input_types = std::vector{TensorType_INT64}; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 4); + + fake_op_sig.input_types = std::vector{TensorType_INT8}; + fake_op_sig.output_types = std::vector{TensorType_INT8}; + fake_op_sig.options.addsub.need_broadcast = true; + fake_op_sig.options.addsub.num_dims = 5; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); + SimpleVersioningTest(BuiltinOperator_SUB); }