Added non-strict mode for 16x8 quantization

2020-02-05 11:55:27 +00:00 · 2020-02-05 11:55:27 +00:00 · 792f553fd0
commit 792f553fd0
parent de6afc5d6b
9 changed files with 111 additions and 31 deletions
--- a/tensorflow/lite/python/lite.py
+++ b/tensorflow/lite/python/lite.py
@ -220,13 +220,16 @@ class TFLiteConverterBase(object):
                         "type to be INT8.")

  def _is_int8_target_required(self):
-    return (set([OpsSet.TFLITE_BUILTINS_INT8]) == set(
+    return ((set([OpsSet.TFLITE_BUILTINS_INT8]) == set(
        self.target_spec.supported_ops) or
-            self._smallest_supported_type() == constants.INT8)
+            self._smallest_supported_type() == constants.INT8) and
+        not self._is_int16x8_target_required())

  def _is_int16x8_target_required(self):
-    return (set([OpsSet.TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8]) ==
-        set(self.target_spec.supported_ops))
+    return bool(
+          set(self.target_spec.supported_ops).intersection([
+            OpsSet.TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
+        ]))

  def _smallest_supported_type(self):
    if self.target_spec.supported_types:
@ -262,6 +265,11 @@ class TFLiteConverterBase(object):
  def _calibrate_quantize_model(self, result, inference_input_type,
                                inference_output_type, enable_mlir_quantizer):
    allow_float = not self._is_int8_target_required() and not self._is_int16x8_target_required()
+    if (self._is_int16x8_target_required()):
+      allow_float = bool(
+        set(self.target_spec.supported_ops).intersection([
+            OpsSet.TFLITE_BUILTINS
+        ]))
    calibrate_quantize = _calibrator.Calibrator(result)
    activations_type = constants.INT16 if self._is_int16x8_target_required() else constants.INT8
    return calibrate_quantize.calibrate_and_quantize(
--- a/tensorflow/lite/tools/optimize/BUILD
+++ b/tensorflow/lite/tools/optimize/BUILD
@ -245,6 +245,7 @@ tf_cc_test(
        "//tensorflow/lite/tools/optimize:testdata/maximum.bin",
        "//tensorflow/lite/tools/optimize:testdata/minimum.bin",
        "//tensorflow/lite/tools/optimize:testdata/mixed.bin",
+        "//tensorflow/lite/tools/optimize:testdata/mixed16x8.bin",
        "//tensorflow/lite/tools/optimize:testdata/multi_input_add_reshape.bin",
        "//tensorflow/lite/tools/optimize:testdata/pack.bin",
        "//tensorflow/lite/tools/optimize:testdata/single_avg_pool_min_minus_5_max_plus_5.bin",
--- a/tensorflow/lite/tools/optimize/operator_property.cc
+++ b/tensorflow/lite/tools/optimize/operator_property.cc
@ -70,6 +70,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      property.inputs = {{0, {}}};
      // ArgMax has no quantizable output.
      property.version = 2;
+      property.quantizable_int16 = false;
      break;
    case BuiltinOperator_AVERAGE_POOL_2D:
      property.inputs = {{0, {}}};
@ -85,6 +86,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      property.outputs = {{0, {}}};
      property.restrict_same_input_output_scale = true;
      property.version = 2;
+      property.quantizable_int16 = false;
      break;
    case BuiltinOperator_SPLIT:
      // We skip input 0 since it is the split dim which is not real valued.
@ -143,6 +145,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      property.inputs = {{0, {}}, {1, {}}};
      // Comparisons have no quantizable outputs.
      property.version = 2;
+      property.quantizable_int16 = false;
      break;
    case BuiltinOperator_EXPAND_DIMS:
      // We skip input 1 as it is not real valued (it's the index of axis) and
@ -165,11 +168,13 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      property.outputs = {{0, {}}};
      property.restrict_same_input_output_scale = true;
      property.version = 2;
+      property.quantizable_int16 = false;
      break;
    case BuiltinOperator_HARD_SWISH: {
      property.inputs = {{0, {}}};
      property.outputs = {{0, {}}};
      property.version = 1;
+      property.quantizable_int16 = false;
      break;
    }
    case BuiltinOperator_LOG_SOFTMAX: {
@ -180,6 +185,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      tensor_property.restricted_value_int8 = {16.0 / 256.0, 127};
      property.outputs = {{0, tensor_property}};
      property.version = 2;
+      property.quantizable_int16 = false;
      break;
    }
    case BuiltinOperator_LOGISTIC: {
@ -736,6 +742,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
        property.restrict_scale = {{18, 0}};
        property.version = 2;
      }
+      property.quantizable_int16 = false;
      break;
    }
    case BuiltinOperator_L2_NORMALIZATION: {
@ -746,6 +753,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      tensor_property.restricted_value_int8 = {1 / 128.0, 0};
      property.outputs = {{0, tensor_property}};
      property.version = 2;
+      property.quantizable_int16 = false;
      break;
    }
    case BuiltinOperator_MAX_POOL_2D:
@ -765,6 +773,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      property.inputs = {{0, {}}};
      property.outputs = {{0, {}}};
      property.version = 2;
+      property.quantizable_int16 = false;
      break;
    case BuiltinOperator_MINIMUM:
      property.arbitrary_inputs = true;
@ -791,6 +800,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      property.outputs = {{0, {}}};
      property.restrict_same_input_output_scale = true;
      property.version = 2;
+      property.quantizable_int16 = false;
      break;
    case BuiltinOperator_QUANTIZE:
      property.inputs = {{0, {}}};
@ -802,11 +812,13 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      property.inputs = {{0, {}}};
      property.outputs = {{0, {}}};
      property.version = 2;
+      property.quantizable_int16 = false;
      break;
    case BuiltinOperator_RELU_N1_TO_1:
      property.inputs = {{0, {}}};
      property.outputs = {{0, {}}};
      property.version = 1;
+      property.quantizable_int16 = false;
      break;
    case BuiltinOperator_RESHAPE:
      property.inputs = {{0, {}}};
@ -820,6 +832,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      property.outputs = {{0, {}}};
      property.restrict_same_input_output_scale = true;
      property.version = 2;
+      property.quantizable_int16 = false;
      break;
    case BuiltinOperator_SHAPE:
      property.inputs = {{0, {}}};
@ -866,6 +879,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
      property.inputs = {{0, {}}};
      property.outputs = {{0, {}}};
      property.version = 2;
+      property.quantizable_int16 = false;
      break;
    case BuiltinOperator_TANH: {
      property.inputs = {{0, {}}};
@ -899,6 +913,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
                         {3, tensor_property_bias}};
      property.outputs = {{0, {}}};
      property.version = 3;
+      property.quantizable_int16 = false;
      break;
    }
    case BuiltinOperator_TRANSPOSE:
@ -916,6 +931,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
    default:
      // No quantized implementation exists for this operation.
      property.quantizable = false;
+      property.quantizable_int16 = false;
  }
  return property;
 }
--- a/tensorflow/lite/tools/optimize/operator_property.h
+++ b/tensorflow/lite/tools/optimize/operator_property.h
@ -65,7 +65,8 @@ struct TensorProperty {
 struct OperatorProperty {
  // Is a quantized operations currently supported.
  bool quantizable = true;
-
+  // Is a quantized operations currently supported for 16x8
+  bool quantizable_int16 = true;
  // Op has arbitrary number of inputs, such as concat.
  bool arbitrary_inputs = false;
  // Op has arbitrary number of outputs, such as slice.
--- a/tensorflow/lite/tools/optimize/quantize_model.cc
+++ b/tensorflow/lite/tools/optimize/quantize_model.cc
@ -43,13 +43,17 @@ namespace {
 // operator_names.
 operator_property::OperatorProperty GetOperatorProperty(
    const std::unordered_set<string>& operator_names, const ModelT* model,
-    int subgraph_index, int op_idx, const string& operator_name) {
+    int subgraph_index, int op_idx, const string& operator_name,
+    const TensorType& activations_type) {
  operator_property::OperatorProperty property =
      operator_property::GetOperatorProperty(model, subgraph_index, op_idx);
  const OperatorT* op =
      model->subgraphs[subgraph_index]->operators[op_idx].get();
  const BuiltinOperator op_code =
      model->operator_codes[op->opcode_index]->builtin_code;
+  if (activations_type == TensorType_INT16 && !property.quantizable_int16) {
+    property.quantizable = false;
+  }
  // The algorithm adds Dequantize and Quantize, so we don't require them to be
  // in the operator_names.
  if (op_code != BuiltinOperator_DEQUANTIZE &&
@ -320,9 +324,9 @@ TfLiteStatus ApplyConstraints(ModelT* model,
    // Iterate backward to avoid messing with index.
    for (int op_idx = subgraph->operators.size() - 1; op_idx >= 0; op_idx--) {
      OperatorT* op = subgraph->operators[op_idx].get();
-      operator_property::OperatorProperty property =
-          GetOperatorProperty(operator_names, model, subgraph_idx, op_idx,
-                              subgraph->tensors[op->outputs[0]]->name);
+      operator_property::OperatorProperty property = GetOperatorProperty(
+          operator_names, model, subgraph_idx, op_idx,
+          subgraph->tensors[op->outputs[0]]->name, activations_type);
      if (!property.quantizable) {
        continue;
      }
@ -840,11 +844,17 @@ TfLiteStatus QuantizeWeightsInputOutput(
      OperatorT* op = subgraph->operators[op_idx].get();
      const BuiltinOperator op_code =
          model->operator_codes[op->opcode_index]->builtin_code;
-      operator_property::OperatorProperty property =
-          GetOperatorProperty(operator_names, model, subgraph_idx, op_idx,
-                              subgraph->tensors[op->outputs[0]]->name);
+      operator_property::OperatorProperty property = GetOperatorProperty(
+          operator_names, model, subgraph_idx, op_idx,
+          subgraph->tensors[op->outputs[0]]->name, activations_type);

-      if (!property.quantizable && !allow_float) {
+      if (activations_type == TensorType_INT16 && !property.quantizable &&
+          !allow_float) {
+        error_reporter->Report(
+            "Quantization to 16x8-bit not yet supported for op: %s",
+            EnumNameBuiltinOperator(op_code));
+        return kTfLiteError;
+      } else if (!property.quantizable && !allow_float) {
        error_reporter->Report("Quantization not yet supported for op: %s",
                               EnumNameBuiltinOperator(op_code));
        return kTfLiteError;
@ -882,9 +892,9 @@ TfLiteStatus QuantizeBiases(ModelT* model,
      OperatorT* op = subgraph->operators[op_idx].get();
      const BuiltinOperator op_code =
          model->operator_codes[op->opcode_index]->builtin_code;
-      operator_property::OperatorProperty property =
-          GetOperatorProperty(operator_names, model, subgraph_idx, op_idx,
-                              subgraph->tensors[op->outputs[0]]->name);
+      operator_property::OperatorProperty property = GetOperatorProperty(
+          operator_names, model, subgraph_idx, op_idx,
+          subgraph->tensors[op->outputs[0]]->name, activations_type);
      if (!property.quantizable) {
        continue;
      }
@ -951,15 +961,15 @@ std::unordered_set<string> GetAllOperatorOutputs(ModelT* model) {
 // will not be filled by this function.
 TfLiteStatus FillQuantizationParams(
    ModelT* model, const std::unordered_set<string>& operator_names,
-    ErrorReporter* error_reporter) {
+    const TensorType& activations_type, ErrorReporter* error_reporter) {
  for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size();
       subgraph_idx++) {
    SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
    for (size_t op_idx = 0; op_idx < subgraph->operators.size(); op_idx++) {
      OperatorT* op = subgraph->operators[op_idx].get();
-      operator_property::OperatorProperty property =
-          GetOperatorProperty(operator_names, model, subgraph_idx, op_idx,
-                              subgraph->tensors[op->outputs[0]]->name);
+      operator_property::OperatorProperty property = GetOperatorProperty(
+          operator_names, model, subgraph_idx, op_idx,
+          subgraph->tensors[op->outputs[0]]->name, activations_type);

      // Populate max, min for each input tensor.
      for (const std::pair<int, operator_property::TensorProperty>& input :
@ -1048,9 +1058,9 @@ TfLiteStatus EnsureBiasScaleCompatibility(
    SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
    for (size_t op_idx = 0; op_idx < subgraph->operators.size(); op_idx++) {
      OperatorT* op = subgraph->operators[op_idx].get();
-      operator_property::OperatorProperty property =
-          GetOperatorProperty(operator_names, model, subgraph_idx, op_idx,
-                              subgraph->tensors[op->outputs[0]]->name);
+      operator_property::OperatorProperty property = GetOperatorProperty(
+          operator_names, model, subgraph_idx, op_idx,
+          subgraph->tensors[op->outputs[0]]->name, activations_type);

      // Loop over all bias tensors.
      for (const int bias_idx : property.biases) {
@ -1174,8 +1184,8 @@ TfLiteStatus QuantizeModel(flatbuffers::FlatBufferBuilder* builder,
                           const std::unordered_set<string>& operator_names,
                           const TensorType& activations_type,
                           ErrorReporter* error_reporter) {
-  TF_LITE_ENSURE_STATUS(
-      FillQuantizationParams(model, operator_names, error_reporter));
+  TF_LITE_ENSURE_STATUS(FillQuantizationParams(
+      model, operator_names, activations_type, error_reporter));
  TF_LITE_ENSURE_STATUS(EnsureBiasScaleCompatibility(
      model, operator_names, activations_type, error_reporter));
  TF_LITE_ENSURE_STATUS(
--- a/tensorflow/lite/tools/optimize/quantize_model_test.cc
+++ b/tensorflow/lite/tools/optimize/quantize_model_test.cc
@ -1308,7 +1308,8 @@ TEST_F(QuantizeFCTest, VerifyFC) {
  EXPECT_EQ(model_.operator_codes[1]->version, 1);
 }

-class QuantizeCustomOpTest : public QuantizeModelTest {
+class QuantizeCustomOpTest : public QuantizeModelTest,
+    public ::testing::WithParamInterface<tflite::TensorType> {
 protected:
  QuantizeCustomOpTest() {
    input_model_ = ReadModel(internal::kModelMixed);
@ -1317,10 +1318,10 @@ class QuantizeCustomOpTest : public QuantizeModelTest {
  }
 };

-TEST_F(QuantizeCustomOpTest, VerifyMixedQuantization) {
+TEST_P(QuantizeCustomOpTest, VerifyMixedQuantization) {
  auto status =
-      QuantizeModel(&builder_, &model_, TensorType_INT8, TensorType_INT8,
-                    /*allow_float=*/true, TensorType_INT8, &error_reporter_);
+      QuantizeModel(&builder_, &model_, GetParam(), GetParam(),
+                    /*allow_float=*/true, GetParam(), &error_reporter_);
  ASSERT_EQ(kTfLiteOk, status);
  const auto& subgraph = model_.subgraphs[0];
  auto float_graph = readonly_model_->subgraphs()->Get(0);
@ -1334,8 +1335,45 @@ TEST_F(QuantizeCustomOpTest, VerifyMixedQuantization) {
      BuiltinOperator_CUSTOM,   BuiltinOperator_CUSTOM,
      BuiltinOperator_QUANTIZE, BuiltinOperator_SQUEEZE};
  const std::vector<TensorType> op_input_types = {
-      TensorType_INT8,    TensorType_INT8,    TensorType_FLOAT32,
-      TensorType_FLOAT32, TensorType_FLOAT32, TensorType_INT8};
+      GetParam(),    GetParam(),    TensorType_FLOAT32,
+      TensorType_FLOAT32, TensorType_FLOAT32, GetParam()};
+  for (int i = 0; i < subgraph->operators.size(); ++i) {
+    OperatorT* op = subgraph->operators[i].get();
+    ASSERT_EQ(model_.operator_codes[op->opcode_index]->builtin_code,
+              op_codes[i]);
+    ASSERT_EQ(subgraph->tensors[op->inputs[0]]->type, op_input_types[i]);
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(QuantizeCustomOpTest, QuantizeCustomOpTest,
+                         ::testing::Values(TensorType_INT8, TensorType_INT16));
+
+class QuantizeOp16x8Test : public QuantizeModelTest {
+ protected:
+  QuantizeOp16x8Test() {
+    input_model_ = ReadModel(internal::kModelMixed16x8);
+    readonly_model_ = input_model_->GetModel();
+    readonly_model_->UnPackTo(&model_);
+  }
+};
+
+TEST_F(QuantizeOp16x8Test, VerifyMixedQuantization16x8) {
+  auto status =
+      QuantizeModel(&builder_, &model_, TensorType_INT16, TensorType_FLOAT32,
+                    /*allow_float=*/true, TensorType_INT16, &error_reporter_);
+  ASSERT_EQ(kTfLiteOk, status);
+  const auto& subgraph = model_.subgraphs[0];
+  auto float_graph = readonly_model_->subgraphs()->Get(0);
+  // The original model conv_2d->log_softmax
+  ASSERT_EQ(float_graph->operators()->size(), 2);
+  // The resulting model should be:
+  // conv_2d->dequantize->log_softmax
+  ASSERT_EQ(subgraph->operators.size(), 3);
+  const std::vector<BuiltinOperator> op_codes = {
+      BuiltinOperator_CONV_2D,  BuiltinOperator_DEQUANTIZE,
+      BuiltinOperator_LOG_SOFTMAX};
+  const std::vector<TensorType> op_input_types = {
+      TensorType_INT16,    TensorType_INT16,    TensorType_FLOAT32};
  for (int i = 0; i < subgraph->operators.size(); ++i) {
    OperatorT* op = subgraph->operators[i].get();
    ASSERT_EQ(model_.operator_codes[op->opcode_index]->builtin_code,
--- a/tensorflow/lite/tools/optimize/test_util.cc
+++ b/tensorflow/lite/tools/optimize/test_util.cc
@ -48,6 +48,7 @@ const char* kModelWithArgMaxOp = "argmax.bin";
 const char* kModelWithFCOp = "fc.bin";

 const char* kModelMixed = "mixed.bin";
+const char* kModelMixed16x8 = "mixed16x8.bin";

 const char* kModelSplit = "split.bin";

--- a/tensorflow/lite/tools/optimize/test_util.h
+++ b/tensorflow/lite/tools/optimize/test_util.h
@ -76,6 +76,11 @@ extern const char* kModelWithFCOp;
 // reshape->custom->custom->squeeze.
 extern const char* kModelMixed;

+// Test model with mixed quantizable and
+// and un-quantizable ops for
+// activations in 16-bit.
+extern const char* kModelMixed16x8;
+
 // Test model with split op.
 extern const char* kModelSplit;

--- a/tensorflow/lite/tools/optimize/testdata/mixed16x8.bin
+++ b/tensorflow/lite/tools/optimize/testdata/mixed16x8.bin