From 792f553fd078a425d66c81567ca8f3588d44fcdc Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Wed, 5 Feb 2020 11:55:27 +0000 Subject: [PATCH] Added non-strict mode for 16x8 quantization --- tensorflow/lite/python/lite.py | 16 ++++-- tensorflow/lite/tools/optimize/BUILD | 1 + .../lite/tools/optimize/operator_property.cc | 16 ++++++ .../lite/tools/optimize/operator_property.h | 3 +- .../lite/tools/optimize/quantize_model.cc | 50 +++++++++++------- .../tools/optimize/quantize_model_test.cc | 50 +++++++++++++++--- tensorflow/lite/tools/optimize/test_util.cc | 1 + tensorflow/lite/tools/optimize/test_util.h | 5 ++ .../tools/optimize/testdata/mixed16x8.bin | Bin 0 -> 1184 bytes 9 files changed, 111 insertions(+), 31 deletions(-) create mode 100644 tensorflow/lite/tools/optimize/testdata/mixed16x8.bin diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index fc9c064faf0..1e0c89d3aa5 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -220,13 +220,16 @@ class TFLiteConverterBase(object): "type to be INT8.") def _is_int8_target_required(self): - return (set([OpsSet.TFLITE_BUILTINS_INT8]) == set( + return ((set([OpsSet.TFLITE_BUILTINS_INT8]) == set( self.target_spec.supported_ops) or - self._smallest_supported_type() == constants.INT8) + self._smallest_supported_type() == constants.INT8) and + not self._is_int16x8_target_required()) def _is_int16x8_target_required(self): - return (set([OpsSet.TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]) == - set(self.target_spec.supported_ops)) + return bool( + set(self.target_spec.supported_ops).intersection([ + OpsSet.TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8 + ])) def _smallest_supported_type(self): if self.target_spec.supported_types: @@ -262,6 +265,11 @@ class TFLiteConverterBase(object): def _calibrate_quantize_model(self, result, inference_input_type, inference_output_type, enable_mlir_quantizer): allow_float = not self._is_int8_target_required() and not self._is_int16x8_target_required() + if (self._is_int16x8_target_required()): + allow_float = bool( + set(self.target_spec.supported_ops).intersection([ + OpsSet.TFLITE_BUILTINS + ])) calibrate_quantize = _calibrator.Calibrator(result) activations_type = constants.INT16 if self._is_int16x8_target_required() else constants.INT8 return calibrate_quantize.calibrate_and_quantize( diff --git a/tensorflow/lite/tools/optimize/BUILD b/tensorflow/lite/tools/optimize/BUILD index 27be0f829ba..ee5e845b96b 100644 --- a/tensorflow/lite/tools/optimize/BUILD +++ b/tensorflow/lite/tools/optimize/BUILD @@ -245,6 +245,7 @@ tf_cc_test( "//tensorflow/lite/tools/optimize:testdata/maximum.bin", "//tensorflow/lite/tools/optimize:testdata/minimum.bin", "//tensorflow/lite/tools/optimize:testdata/mixed.bin", + "//tensorflow/lite/tools/optimize:testdata/mixed16x8.bin", "//tensorflow/lite/tools/optimize:testdata/multi_input_add_reshape.bin", "//tensorflow/lite/tools/optimize:testdata/pack.bin", "//tensorflow/lite/tools/optimize:testdata/single_avg_pool_min_minus_5_max_plus_5.bin", diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc index 1f2d8bb4a4d..c31ad9dbb1e 100644 --- a/tensorflow/lite/tools/optimize/operator_property.cc +++ b/tensorflow/lite/tools/optimize/operator_property.cc @@ -70,6 +70,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.inputs = {{0, {}}}; // ArgMax has no quantizable output. property.version = 2; + property.quantizable_int16 = false; break; case BuiltinOperator_AVERAGE_POOL_2D: property.inputs = {{0, {}}}; @@ -85,6 +86,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.outputs = {{0, {}}}; property.restrict_same_input_output_scale = true; property.version = 2; + property.quantizable_int16 = false; break; case BuiltinOperator_SPLIT: // We skip input 0 since it is the split dim which is not real valued. @@ -143,6 +145,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.inputs = {{0, {}}, {1, {}}}; // Comparisons have no quantizable outputs. property.version = 2; + property.quantizable_int16 = false; break; case BuiltinOperator_EXPAND_DIMS: // We skip input 1 as it is not real valued (it's the index of axis) and @@ -165,11 +168,13 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.outputs = {{0, {}}}; property.restrict_same_input_output_scale = true; property.version = 2; + property.quantizable_int16 = false; break; case BuiltinOperator_HARD_SWISH: { property.inputs = {{0, {}}}; property.outputs = {{0, {}}}; property.version = 1; + property.quantizable_int16 = false; break; } case BuiltinOperator_LOG_SOFTMAX: { @@ -180,6 +185,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, tensor_property.restricted_value_int8 = {16.0 / 256.0, 127}; property.outputs = {{0, tensor_property}}; property.version = 2; + property.quantizable_int16 = false; break; } case BuiltinOperator_LOGISTIC: { @@ -736,6 +742,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.restrict_scale = {{18, 0}}; property.version = 2; } + property.quantizable_int16 = false; break; } case BuiltinOperator_L2_NORMALIZATION: { @@ -746,6 +753,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, tensor_property.restricted_value_int8 = {1 / 128.0, 0}; property.outputs = {{0, tensor_property}}; property.version = 2; + property.quantizable_int16 = false; break; } case BuiltinOperator_MAX_POOL_2D: @@ -765,6 +773,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.inputs = {{0, {}}}; property.outputs = {{0, {}}}; property.version = 2; + property.quantizable_int16 = false; break; case BuiltinOperator_MINIMUM: property.arbitrary_inputs = true; @@ -791,6 +800,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.outputs = {{0, {}}}; property.restrict_same_input_output_scale = true; property.version = 2; + property.quantizable_int16 = false; break; case BuiltinOperator_QUANTIZE: property.inputs = {{0, {}}}; @@ -802,11 +812,13 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.inputs = {{0, {}}}; property.outputs = {{0, {}}}; property.version = 2; + property.quantizable_int16 = false; break; case BuiltinOperator_RELU_N1_TO_1: property.inputs = {{0, {}}}; property.outputs = {{0, {}}}; property.version = 1; + property.quantizable_int16 = false; break; case BuiltinOperator_RESHAPE: property.inputs = {{0, {}}}; @@ -820,6 +832,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.outputs = {{0, {}}}; property.restrict_same_input_output_scale = true; property.version = 2; + property.quantizable_int16 = false; break; case BuiltinOperator_SHAPE: property.inputs = {{0, {}}}; @@ -866,6 +879,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.inputs = {{0, {}}}; property.outputs = {{0, {}}}; property.version = 2; + property.quantizable_int16 = false; break; case BuiltinOperator_TANH: { property.inputs = {{0, {}}}; @@ -899,6 +913,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, {3, tensor_property_bias}}; property.outputs = {{0, {}}}; property.version = 3; + property.quantizable_int16 = false; break; } case BuiltinOperator_TRANSPOSE: @@ -916,6 +931,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, default: // No quantized implementation exists for this operation. property.quantizable = false; + property.quantizable_int16 = false; } return property; } diff --git a/tensorflow/lite/tools/optimize/operator_property.h b/tensorflow/lite/tools/optimize/operator_property.h index 23052308568..151e314f335 100644 --- a/tensorflow/lite/tools/optimize/operator_property.h +++ b/tensorflow/lite/tools/optimize/operator_property.h @@ -65,7 +65,8 @@ struct TensorProperty { struct OperatorProperty { // Is a quantized operations currently supported. bool quantizable = true; - + // Is a quantized operations currently supported for 16x8 + bool quantizable_int16 = true; // Op has arbitrary number of inputs, such as concat. bool arbitrary_inputs = false; // Op has arbitrary number of outputs, such as slice. diff --git a/tensorflow/lite/tools/optimize/quantize_model.cc b/tensorflow/lite/tools/optimize/quantize_model.cc index bbb40080fbc..ceae3c29d9e 100644 --- a/tensorflow/lite/tools/optimize/quantize_model.cc +++ b/tensorflow/lite/tools/optimize/quantize_model.cc @@ -43,13 +43,17 @@ namespace { // operator_names. operator_property::OperatorProperty GetOperatorProperty( const std::unordered_set& operator_names, const ModelT* model, - int subgraph_index, int op_idx, const string& operator_name) { + int subgraph_index, int op_idx, const string& operator_name, + const TensorType& activations_type) { operator_property::OperatorProperty property = operator_property::GetOperatorProperty(model, subgraph_index, op_idx); const OperatorT* op = model->subgraphs[subgraph_index]->operators[op_idx].get(); const BuiltinOperator op_code = model->operator_codes[op->opcode_index]->builtin_code; + if (activations_type == TensorType_INT16 && !property.quantizable_int16) { + property.quantizable = false; + } // The algorithm adds Dequantize and Quantize, so we don't require them to be // in the operator_names. if (op_code != BuiltinOperator_DEQUANTIZE && @@ -320,9 +324,9 @@ TfLiteStatus ApplyConstraints(ModelT* model, // Iterate backward to avoid messing with index. for (int op_idx = subgraph->operators.size() - 1; op_idx >= 0; op_idx--) { OperatorT* op = subgraph->operators[op_idx].get(); - operator_property::OperatorProperty property = - GetOperatorProperty(operator_names, model, subgraph_idx, op_idx, - subgraph->tensors[op->outputs[0]]->name); + operator_property::OperatorProperty property = GetOperatorProperty( + operator_names, model, subgraph_idx, op_idx, + subgraph->tensors[op->outputs[0]]->name, activations_type); if (!property.quantizable) { continue; } @@ -840,11 +844,17 @@ TfLiteStatus QuantizeWeightsInputOutput( OperatorT* op = subgraph->operators[op_idx].get(); const BuiltinOperator op_code = model->operator_codes[op->opcode_index]->builtin_code; - operator_property::OperatorProperty property = - GetOperatorProperty(operator_names, model, subgraph_idx, op_idx, - subgraph->tensors[op->outputs[0]]->name); + operator_property::OperatorProperty property = GetOperatorProperty( + operator_names, model, subgraph_idx, op_idx, + subgraph->tensors[op->outputs[0]]->name, activations_type); - if (!property.quantizable && !allow_float) { + if (activations_type == TensorType_INT16 && !property.quantizable && + !allow_float) { + error_reporter->Report( + "Quantization to 16x8-bit not yet supported for op: %s", + EnumNameBuiltinOperator(op_code)); + return kTfLiteError; + } else if (!property.quantizable && !allow_float) { error_reporter->Report("Quantization not yet supported for op: %s", EnumNameBuiltinOperator(op_code)); return kTfLiteError; @@ -882,9 +892,9 @@ TfLiteStatus QuantizeBiases(ModelT* model, OperatorT* op = subgraph->operators[op_idx].get(); const BuiltinOperator op_code = model->operator_codes[op->opcode_index]->builtin_code; - operator_property::OperatorProperty property = - GetOperatorProperty(operator_names, model, subgraph_idx, op_idx, - subgraph->tensors[op->outputs[0]]->name); + operator_property::OperatorProperty property = GetOperatorProperty( + operator_names, model, subgraph_idx, op_idx, + subgraph->tensors[op->outputs[0]]->name, activations_type); if (!property.quantizable) { continue; } @@ -951,15 +961,15 @@ std::unordered_set GetAllOperatorOutputs(ModelT* model) { // will not be filled by this function. TfLiteStatus FillQuantizationParams( ModelT* model, const std::unordered_set& operator_names, - ErrorReporter* error_reporter) { + const TensorType& activations_type, ErrorReporter* error_reporter) { for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size(); subgraph_idx++) { SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get(); for (size_t op_idx = 0; op_idx < subgraph->operators.size(); op_idx++) { OperatorT* op = subgraph->operators[op_idx].get(); - operator_property::OperatorProperty property = - GetOperatorProperty(operator_names, model, subgraph_idx, op_idx, - subgraph->tensors[op->outputs[0]]->name); + operator_property::OperatorProperty property = GetOperatorProperty( + operator_names, model, subgraph_idx, op_idx, + subgraph->tensors[op->outputs[0]]->name, activations_type); // Populate max, min for each input tensor. for (const std::pair& input : @@ -1048,9 +1058,9 @@ TfLiteStatus EnsureBiasScaleCompatibility( SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get(); for (size_t op_idx = 0; op_idx < subgraph->operators.size(); op_idx++) { OperatorT* op = subgraph->operators[op_idx].get(); - operator_property::OperatorProperty property = - GetOperatorProperty(operator_names, model, subgraph_idx, op_idx, - subgraph->tensors[op->outputs[0]]->name); + operator_property::OperatorProperty property = GetOperatorProperty( + operator_names, model, subgraph_idx, op_idx, + subgraph->tensors[op->outputs[0]]->name, activations_type); // Loop over all bias tensors. for (const int bias_idx : property.biases) { @@ -1174,8 +1184,8 @@ TfLiteStatus QuantizeModel(flatbuffers::FlatBufferBuilder* builder, const std::unordered_set& operator_names, const TensorType& activations_type, ErrorReporter* error_reporter) { - TF_LITE_ENSURE_STATUS( - FillQuantizationParams(model, operator_names, error_reporter)); + TF_LITE_ENSURE_STATUS(FillQuantizationParams( + model, operator_names, activations_type, error_reporter)); TF_LITE_ENSURE_STATUS(EnsureBiasScaleCompatibility( model, operator_names, activations_type, error_reporter)); TF_LITE_ENSURE_STATUS( diff --git a/tensorflow/lite/tools/optimize/quantize_model_test.cc b/tensorflow/lite/tools/optimize/quantize_model_test.cc index ef46b3fbd5d..b73cb9a79ca 100644 --- a/tensorflow/lite/tools/optimize/quantize_model_test.cc +++ b/tensorflow/lite/tools/optimize/quantize_model_test.cc @@ -1308,7 +1308,8 @@ TEST_F(QuantizeFCTest, VerifyFC) { EXPECT_EQ(model_.operator_codes[1]->version, 1); } -class QuantizeCustomOpTest : public QuantizeModelTest { +class QuantizeCustomOpTest : public QuantizeModelTest, + public ::testing::WithParamInterface { protected: QuantizeCustomOpTest() { input_model_ = ReadModel(internal::kModelMixed); @@ -1317,10 +1318,10 @@ class QuantizeCustomOpTest : public QuantizeModelTest { } }; -TEST_F(QuantizeCustomOpTest, VerifyMixedQuantization) { +TEST_P(QuantizeCustomOpTest, VerifyMixedQuantization) { auto status = - QuantizeModel(&builder_, &model_, TensorType_INT8, TensorType_INT8, - /*allow_float=*/true, TensorType_INT8, &error_reporter_); + QuantizeModel(&builder_, &model_, GetParam(), GetParam(), + /*allow_float=*/true, GetParam(), &error_reporter_); ASSERT_EQ(kTfLiteOk, status); const auto& subgraph = model_.subgraphs[0]; auto float_graph = readonly_model_->subgraphs()->Get(0); @@ -1334,8 +1335,45 @@ TEST_F(QuantizeCustomOpTest, VerifyMixedQuantization) { BuiltinOperator_CUSTOM, BuiltinOperator_CUSTOM, BuiltinOperator_QUANTIZE, BuiltinOperator_SQUEEZE}; const std::vector op_input_types = { - TensorType_INT8, TensorType_INT8, TensorType_FLOAT32, - TensorType_FLOAT32, TensorType_FLOAT32, TensorType_INT8}; + GetParam(), GetParam(), TensorType_FLOAT32, + TensorType_FLOAT32, TensorType_FLOAT32, GetParam()}; + for (int i = 0; i < subgraph->operators.size(); ++i) { + OperatorT* op = subgraph->operators[i].get(); + ASSERT_EQ(model_.operator_codes[op->opcode_index]->builtin_code, + op_codes[i]); + ASSERT_EQ(subgraph->tensors[op->inputs[0]]->type, op_input_types[i]); + } +} + +INSTANTIATE_TEST_SUITE_P(QuantizeCustomOpTest, QuantizeCustomOpTest, + ::testing::Values(TensorType_INT8, TensorType_INT16)); + +class QuantizeOp16x8Test : public QuantizeModelTest { + protected: + QuantizeOp16x8Test() { + input_model_ = ReadModel(internal::kModelMixed16x8); + readonly_model_ = input_model_->GetModel(); + readonly_model_->UnPackTo(&model_); + } +}; + +TEST_F(QuantizeOp16x8Test, VerifyMixedQuantization16x8) { + auto status = + QuantizeModel(&builder_, &model_, TensorType_INT16, TensorType_FLOAT32, + /*allow_float=*/true, TensorType_INT16, &error_reporter_); + ASSERT_EQ(kTfLiteOk, status); + const auto& subgraph = model_.subgraphs[0]; + auto float_graph = readonly_model_->subgraphs()->Get(0); + // The original model conv_2d->log_softmax + ASSERT_EQ(float_graph->operators()->size(), 2); + // The resulting model should be: + // conv_2d->dequantize->log_softmax + ASSERT_EQ(subgraph->operators.size(), 3); + const std::vector op_codes = { + BuiltinOperator_CONV_2D, BuiltinOperator_DEQUANTIZE, + BuiltinOperator_LOG_SOFTMAX}; + const std::vector op_input_types = { + TensorType_INT16, TensorType_INT16, TensorType_FLOAT32}; for (int i = 0; i < subgraph->operators.size(); ++i) { OperatorT* op = subgraph->operators[i].get(); ASSERT_EQ(model_.operator_codes[op->opcode_index]->builtin_code, diff --git a/tensorflow/lite/tools/optimize/test_util.cc b/tensorflow/lite/tools/optimize/test_util.cc index 7d5e9d65f06..379be64059f 100644 --- a/tensorflow/lite/tools/optimize/test_util.cc +++ b/tensorflow/lite/tools/optimize/test_util.cc @@ -48,6 +48,7 @@ const char* kModelWithArgMaxOp = "argmax.bin"; const char* kModelWithFCOp = "fc.bin"; const char* kModelMixed = "mixed.bin"; +const char* kModelMixed16x8 = "mixed16x8.bin"; const char* kModelSplit = "split.bin"; diff --git a/tensorflow/lite/tools/optimize/test_util.h b/tensorflow/lite/tools/optimize/test_util.h index abcdbc21d36..a49f3500288 100644 --- a/tensorflow/lite/tools/optimize/test_util.h +++ b/tensorflow/lite/tools/optimize/test_util.h @@ -76,6 +76,11 @@ extern const char* kModelWithFCOp; // reshape->custom->custom->squeeze. extern const char* kModelMixed; +// Test model with mixed quantizable and +// and un-quantizable ops for +// activations in 16-bit. +extern const char* kModelMixed16x8; + // Test model with split op. extern const char* kModelSplit; diff --git a/tensorflow/lite/tools/optimize/testdata/mixed16x8.bin b/tensorflow/lite/tools/optimize/testdata/mixed16x8.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1f615e966eb164341d8ba4d56e1e359ef516388 GIT binary patch literal 1184 zcmb1PU|hJk@Whk=1Xg@J(qWH-pHr2qf_gJ_T$hI)o3dIk&(3IG59Px=4cJI#2x6#}#XYV(I z+0J^akG-Ncll|PMa(3Mu8TRL0SnTgR%h)@mnb}(lm)YNIw6YgxVzg)bKG|+RlZpKt zJw5xz<5l)73~XS3f_%oo0OCRX4RR*~*zYW0d45p%_&fV6IOpe;r52T>rs#pgMu35V zfrmi^9-=%93=C%&7#I#PFfeQZ>t|qKVqjp10EaXKg9uzL$aD~f!~!UsCBSAu><7t! z#9%atEy4g&0g5?x29UiF(?BX17%iaT4N?JfI|Bn3g8&0NIIg+C$`}|y<{)EGyo1z( z+z4WW zD#KjsMVOIGd{Sm&G1PpJ-Aw=g{|BWtklir* zzTmOXnne5bgHjVy!V-%z6O(dM{R`lJ2H6M0ZU6uOPhenR0EGd}4Hf@!hao6Gk>Y1i zJi_7vW+%w+3=DkWRLB5M83GJ^3=9k|43KmRvICTcL8%N>N`Ue_t~dqBd8VZ1m1LGw i;<5{ru0Z)8WF{z&u`#fM%Mb=5HU