Support unknown dimensions in quantized models.

PiperOrigin-RevId: 310269161 Change-Id: I83b58ec5d8128485bbed9839d60478e543a7c09e
2020-05-06 18:25:32 -07:00 · 2020-05-06 18:25:32 -07:00 · 10f4282cff
commit 10f4282cff
parent 09d304b9f3
10 changed files with 144 additions and 42 deletions
--- a/tensorflow/lite/python/lite.py
+++ b/tensorflow/lite/python/lite.py
@ -346,15 +346,9 @@ class TFLiteConverterBase(object):
          self.representative_dataset.input_gen, inference_input_type,
          inference_output_type, allow_float)

-  def _is_unknown_shapes_allowed(self, fp32_execution):
-    # TODO(b/128319310): Investigate which quantization methods work.
-    if not fp32_execution:
-      return False
-
+  def _is_unknown_shapes_allowed(self):
    # Unknown dimensions are only allowed with the new converter.
-    if not self.experimental_new_converter:
-      return False
-    return True
+    return self.experimental_new_converter

  def _get_base_converter_args(self):
    """Returns the base converter args.
@ -657,7 +651,7 @@ class TFLiteConverterV2(TFLiteConverterBase):
    quant_mode = QuantizationMode(self.optimizations, self.target_spec,
                                  self.representative_dataset, graph_def)

-    if not self._is_unknown_shapes_allowed(quant_mode.fp32_execution()):
+    if not self._is_unknown_shapes_allowed():
      # Checks dimensions in input tensor.
      for tensor in input_tensors:
        # Note that shape_list might be empty for scalar shapes.
@ -1197,8 +1191,7 @@ class TFLiteConverter(TFLiteConverterBase):
                                  self.representative_dataset, self._graph_def)

    # Checks dimensions in input tensor.
-    if (not self._is_unknown_shapes_allowed(quant_mode.fp32_execution()) and
-        self._has_valid_tensors()):
+    if (not self._is_unknown_shapes_allowed() and self._has_valid_tensors()):
      for tensor in self._input_tensors:
        shape = tensor.shape
        if not shape:
@ -1399,13 +1392,12 @@ class TFLiteConverter(TFLiteConverterBase):
        shape[0] = batch_size
        tensor.set_shape(shape)

-  def _is_unknown_shapes_allowed(self, fp32_execution):
+  def _is_unknown_shapes_allowed(self):
    # Ophint Converted nodes will need the shapes to be known.
    if _is_ophint_converted(self._graph_def):
      return False

-    if not super(TFLiteConverter,
-                 self)._is_unknown_shapes_allowed(fp32_execution):
+    if not super(TFLiteConverter, self)._is_unknown_shapes_allowed():
      return False

    # `conversion_summary_dir` calls TOCO. Unknown shapes are only supported by
--- a/tensorflow/lite/python/lite_v2_test.py
+++ b/tensorflow/lite/python/lite_v2_test.py
@ -876,6 +876,84 @@ class UnknownShapes(lite_v2_test_util.ModelTest):
    np.testing.assert_almost_equal(
        expected_value.numpy(), actual_value[0], decimal=6)

+  def _getQuantizedModel(self):
+    # Returns a model with tf.MatMul and unknown dimensions.
+    @tf.function(
+        input_signature=[tf.TensorSpec(shape=[None, 33], dtype=tf.float32)])
+    def model(in_tensor):
+      # We need the tensor to have more than 1024 elements for quantize_weights
+      # to kick in. Thus, the [33, 33] shape.
+      const_tensor = tf.constant(
+          np.random.uniform(low=-10., high=10., size=[33, 33]),
+          shape=[33, 33],
+          dtype=tf.float32,
+          name='inputB')
+
+      shape = tf.shape(in_tensor)
+      fill = tf.transpose(tf.fill(shape, 1.))
+      mult = tf.matmul(fill, in_tensor)
+      return tf.matmul(mult, const_tensor)
+
+    concrete_func = model.get_concrete_function()
+
+    def calibration_gen():
+      for batch in range(5, 20, 5):
+        for _ in range(5):
+          yield [np.random.uniform(-1, 1, size=(batch, 33)).astype(np.float32)]
+
+    return concrete_func, calibration_gen
+
+  @test_util.run_v2_only
+  def testMatMulQuantize(self):
+    concrete_func, _ = self._getQuantizedModel()
+    float_converter = lite.TFLiteConverterV2.from_concrete_functions(
+        [concrete_func])
+    float_converter.experimental_new_converter = True
+    float_tflite_model = float_converter.convert()
+
+    quantized_converter = lite.TFLiteConverterV2.from_concrete_functions(
+        [concrete_func])
+    quantized_converter.experimental_new_converter = True
+    quantized_converter.optimizations = [lite.Optimize.DEFAULT]
+    quantized_tflite_model = quantized_converter.convert()
+
+    # The default input and output types should be float.
+    quantized_interpreter = Interpreter(model_content=quantized_tflite_model)
+    quantized_interpreter.allocate_tensors()
+    input_details = quantized_interpreter.get_input_details()
+    self.assertLen(input_details, 1)
+    self.assertEqual(np.float32, input_details[0]['dtype'])
+    self.assertTrue((input_details[0]['shape_signature'] == [-1, 33]).all())
+
+    # Ensure that the quantized weights tflite model is smaller.
+    self.assertLess(len(quantized_tflite_model), len(float_tflite_model))
+
+  @test_util.run_v2_only
+  def testMatMulCalibrateAndQuantize(self):
+    concrete_func, calibration_gen = self._getQuantizedModel()
+    float_converter = lite.TFLiteConverterV2.from_concrete_functions(
+        [concrete_func])
+    float_converter.experimental_new_converter = True
+    float_tflite_model = float_converter.convert()
+
+    quantized_converter = lite.TFLiteConverterV2.from_concrete_functions(
+        [concrete_func])
+    quantized_converter.optimizations = [lite.Optimize.DEFAULT]
+    quantized_converter.representative_dataset = calibration_gen
+    quantized_converter.experimental_new_converter = True
+    quantized_tflite_model = quantized_converter.convert()
+
+    # The default input and output types should be float.
+    quantized_interpreter = Interpreter(model_content=quantized_tflite_model)
+    quantized_interpreter.allocate_tensors()
+    input_details = quantized_interpreter.get_input_details()
+    self.assertLen(input_details, 1)
+    self.assertEqual(np.float32, input_details[0]['dtype'])
+    self.assertTrue((input_details[0]['shape_signature'] == [-1, 33]).all())
+
+    # Ensure that the quantized weights tflite model is smaller.
+    self.assertLess(len(quantized_tflite_model), len(float_tflite_model))
+
  def testBatchMatMul(self):
    input_data_1 = tf.constant(
        np.array(np.random.random_sample((1, 256, 256)), dtype=np.float32))
--- a/tensorflow/lite/python/lite_v2_test_util.py
+++ b/tensorflow/lite/python/lite_v2_test_util.py
@ -53,10 +53,12 @@ class ModelTest(test_util.TensorFlowTestCase, parameterized.TestCase):
      for idx, (shape_signature, final_shape) in enumerate(input_shapes):
        self.assertTrue(
            (input_details[idx]['shape_signature'] == shape_signature).all())
-        interpreter.resize_tensor_input(idx, final_shape, strict=True)
+        index = input_details[idx]['index']
+        interpreter.resize_tensor_input(index, final_shape, strict=True)
    interpreter.allocate_tensors()

    output_details = interpreter.get_output_details()
+    input_details = interpreter.get_input_details()

    for input_tensor, tensor_data in zip(input_details, input_data):
      interpreter.set_tensor(input_tensor['index'], tensor_data.numpy())
--- a/tensorflow/lite/python/optimize/calibration_wrapper.cc
+++ b/tensorflow/lite/python/optimize/calibration_wrapper.cc
@ -218,16 +218,33 @@ PyObject* CalibrationWrapper::SetTensor(int index, PyObject* value) {
    return nullptr;
  }

+  std::vector<int> dims(PyArray_NDIM(array));
+  bool has_unknown_dims = false;
  for (int j = 0; j < PyArray_NDIM(array); j++) {
-    if (tensor->dims->data[j] != PyArray_SHAPE(array)[j]) {
+    // Ensure the calibration data input shape is the same as the model input
+    // shape unless the dimension is unknown.
+    if (tensor->dims_signature->size == tensor->dims->size &&
+        tensor->dims_signature->data[j] == -1) {
+      has_unknown_dims = true;
+    } else if (tensor->dims->data[j] != PyArray_SHAPE(array)[j]) {
      PyErr_Format(PyExc_ValueError,
                   "Cannot set tensor: Size mismatch, expected %d for dim "
                   "%d but found %ld",
                   tensor->dims->data[j], j, PyArray_SHAPE(array)[j]);
      return nullptr;
    }
+    dims[j] = PyArray_SHAPE(array)[j];
  }

+  // Resize the input tensor if there are unknown dimensions.
+  if (has_unknown_dims) {
+    // Does strict checking on the `ResizeInputTensor` call.
+    TFLITE_PY_CHECK(interpreter_->ResizeInputTensorStrict(index, dims));
+    TFLITE_PY_CHECK(interpreter_->AllocateTensors());
+  }
+
+  tensor = interpreter_->tensor(index);
+
  size_t size = PyArray_NBYTES(array);
  if (size != tensor->bytes) {
    PyErr_Format(PyExc_ValueError,
--- a/tensorflow/lite/testing/model_coverage/model_coverage_lib.py
+++ b/tensorflow/lite/testing/model_coverage/model_coverage_lib.py
@ -424,7 +424,8 @@ def test_frozen_graph_quant(filename,

  # Convert and load the quantized model.
  converter = _lite.TFLiteConverter.from_frozen_graph(filename, input_arrays,
-                                                      output_arrays)
+                                                      output_arrays,
+                                                      input_shapes)
  tflite_model_quant = _convert(
      converter, post_training_quantize=True, **kwargs)

--- a/tensorflow/lite/tools/optimize/model_utils.cc
+++ b/tensorflow/lite/tools/optimize/model_utils.cc
@ -77,10 +77,14 @@ void MakeQuantizeOperator(ModelT* model, std::unique_ptr<OperatorT>* op,

 // Create a new TensorT object without quantization parameters.
 void MakeTensor(const string& name, const std::vector<int32_t>& shape,
+                const std::vector<int32_t>& shape_signature,
                const TensorType& type, std::unique_ptr<TensorT>* tensor) {
  TensorT* tensor_raw = new TensorT;
  tensor_raw->name = name;
  tensor_raw->shape = shape;
+  if (!shape_signature.empty()) {
+    tensor_raw->shape_signature = shape_signature;
+  }
  tensor_raw->type = type;

  tensor->reset(tensor_raw);
@ -89,10 +93,11 @@ void MakeTensor(const string& name, const std::vector<int32_t>& shape,
 // Create a new TensorT object with quantization parameters.
 void MakeTensorWithQuantParam(const string& name,
                              const std::vector<int32_t>& shape,
+                              const std::vector<int32_t>& shape_signature,
                              const TensorType& type, float scale,
                              int64_t zero_point,
                              std::unique_ptr<TensorT>* tensor) {
-  MakeTensor(name, shape, type, tensor);
+  MakeTensor(name, shape, shape_signature, type, tensor);
  (*tensor)->quantization = absl::make_unique<QuantizationParametersT>();
  (*tensor)->quantization->scale.push_back(scale);
  (*tensor)->quantization->zero_point.push_back(zero_point);
--- a/tensorflow/lite/tools/optimize/model_utils.h
+++ b/tensorflow/lite/tools/optimize/model_utils.h
@ -34,11 +34,13 @@ void MakeQuantizeOperator(ModelT* model, std::unique_ptr<OperatorT>* op,

 // Create a new TensorT object without quantization parameters.
 void MakeTensor(const string& name, const std::vector<int32_t>& shape,
+                const std::vector<int32_t>& shape_signature,
                const TensorType& type, std::unique_ptr<TensorT>* tensor);

 // Create a new TensorT object with quantization parameters.
 void MakeTensorWithQuantParam(const string& name,
                              const std::vector<int32_t>& shape,
+                              const std::vector<int32_t>& shape_signature,
                              const TensorType& type, float scale,
                              int64_t zero_point,
                              std::unique_ptr<TensorT>* tensor);
--- a/tensorflow/lite/tools/optimize/modify_model_interface.cc
+++ b/tensorflow/lite/tools/optimize/modify_model_interface.cc
@ -383,9 +383,9 @@ void AddUint8Dequant(
        const std::pair<float, int32_t>& provided_quant_params =
            quant_params.at(string(tensor->name));
        utils::MakeTensorWithQuantParam(
-            added_tensor_name, tensor->shape, TensorType_UINT8,
-            provided_quant_params.first, provided_quant_params.second,
-            &leading_op_input);
+            added_tensor_name, tensor->shape, tensor->shape_signature,
+            TensorType_UINT8, provided_quant_params.first,
+            provided_quant_params.second, &leading_op_input);
        const int32_t leading_op_input_idx = subgraph->tensors.size();
        subgraph->tensors.push_back(std::move(leading_op_input));

@ -423,9 +423,9 @@ void AddUint8Quant(
        const std::pair<float, int32_t>& provided_quant_params =
            quant_params.at(string(tensor->name));
        utils::MakeTensorWithQuantParam(
-            added_tensor_name, tensor->shape, TensorType_UINT8,
-            provided_quant_params.first, provided_quant_params.second,
-            &tailing_op_output);
+            added_tensor_name, tensor->shape, tensor->shape_signature,
+            TensorType_UINT8, provided_quant_params.first,
+            provided_quant_params.second, &tailing_op_output);
        const int32_t tailing_op_output_idx = subgraph->tensors.size();
        subgraph->tensors.push_back(std::move(tailing_op_output));

--- a/tensorflow/lite/tools/optimize/quantize_model.cc
+++ b/tensorflow/lite/tools/optimize/quantize_model.cc
@ -141,8 +141,8 @@ int32_t SetInputType(ModelT* model, SubGraphT* subgraph,
      const string leading_op_name = tensor->name;
      const string new_name_original_input = tensor->name + "_int8";
      tensor->name = new_name_original_input;
-      utils::MakeTensor(leading_op_name, tensor->shape, input_type,
-                        &leading_op_input);
+      utils::MakeTensor(leading_op_name, tensor->shape, tensor->shape_signature,
+                        input_type, &leading_op_input);
    } else {
      // Get scale and zero point from the first tensor.
      const float scale = subgraph->tensors[tensor_idx]->quantization->scale[0];
@ -156,9 +156,9 @@ int32_t SetInputType(ModelT* model, SubGraphT* subgraph,
      const string leading_op_name = tensor->name;
      const string new_name_original_input = tensor->name + "_int8";
      tensor->name = new_name_original_input;
-      utils::MakeTensorWithQuantParam(leading_op_name, tensor->shape,
-                                      input_type, scale, zero_point + 128,
-                                      &leading_op_input);
+      utils::MakeTensorWithQuantParam(
+          leading_op_name, tensor->shape, tensor->shape_signature, input_type,
+          scale, zero_point + 128, &leading_op_input);
    }
    const int32_t leading_op_input_idx = subgraph->tensors.size();
    subgraph->tensors.push_back(std::move(leading_op_input));
@ -193,8 +193,8 @@ int32_t SetOutputType(ModelT* model, SubGraphT* subgraph,
      const string tailing_op_name = tensor->name;
      const string new_name_original_output = tensor->name + "_int8";
      tensor->name = new_name_original_output;
-      utils::MakeTensor(tailing_op_name, tensor->shape, output_type,
-                        &tailing_op_output);
+      utils::MakeTensor(tailing_op_name, tensor->shape, tensor->shape_signature,
+                        output_type, &tailing_op_output);
    } else {
      // Get scale and zero point from the last tensor.
      const float scale = subgraph->tensors[tensor_idx]->quantization->scale[0];
@ -208,9 +208,9 @@ int32_t SetOutputType(ModelT* model, SubGraphT* subgraph,
      const string tailing_op_name = tensor->name;
      const string new_name_original_output = tensor->name + "_int8";
      tensor->name = new_name_original_output;
-      utils::MakeTensorWithQuantParam(tailing_op_name, tensor->shape,
-                                      output_type, scale, zero_point + 128,
-                                      &tailing_op_output);
+      utils::MakeTensorWithQuantParam(
+          tailing_op_name, tensor->shape, tensor->shape_signature, output_type,
+          scale, zero_point + 128, &tailing_op_output);
    }
    const int32_t tailing_op_output_idx = subgraph->tensors.size();
    subgraph->tensors.push_back(std::move(tailing_op_output));
@ -340,8 +340,9 @@ TfLiteStatus ApplyConstraints(ModelT* model,
        std::unique_ptr<TensorT> additional_tensor;
        const string requant_tensor_name = input_tensor->name + "_requantized";
        utils::MakeTensorWithQuantParam(
-            requant_tensor_name, input_tensor->shape, TensorType_INT8,
-            output_scale, output_zp, &additional_tensor);
+            requant_tensor_name, input_tensor->shape,
+            input_tensor->shape_signature, TensorType_INT8, output_scale,
+            output_zp, &additional_tensor);
        const int32_t additional_tensor_idx = subgraph->tensors.size();
        subgraph->tensors.push_back(std::move(additional_tensor));

@ -545,7 +546,7 @@ TfLiteStatus QuantizeOpInput(
        // operation since the preceding op may require a float output.
        std::unique_ptr<TensorT> op_output;
        utils::MakeTensor(tensor->name + "_int8", tensor->shape,
-                          TensorType_INT8, &op_output);
+                          tensor->shape_signature, TensorType_INT8, &op_output);
        op_output->quantization = absl::make_unique<QuantizationParametersT>();
        op_output->quantization->min.push_back(tensor->quantization->min[0]);
        op_output->quantization->max.push_back(tensor->quantization->max[0]);
@ -573,7 +574,7 @@ TfLiteStatus QuantizeOpInput(
    // since this op is not quantizable.
    std::unique_ptr<TensorT> op_output;
    utils::MakeTensor(tensor->name + "_float", tensor->shape,
-                      TensorType_FLOAT32, &op_output);
+                      tensor->shape_signature, TensorType_FLOAT32, &op_output);
    const int32_t dequant_op_output_idx = subgraph->tensors.size();
    subgraph->tensors.push_back(std::move(op_output));
    std::unique_ptr<OperatorT> dequant_op;
--- a/tensorflow/lite/tools/optimize/quantize_weights.cc
+++ b/tensorflow/lite/tools/optimize/quantize_weights.cc
@ -259,10 +259,14 @@ void MakeDequantizeOperator(ModelT* model, std::unique_ptr<OperatorT>* op,

 // Create a new TensorT object.
 void MakeTensor(const string& name, const std::vector<int32_t>& shape,
+                const std::vector<int32_t>& shape_signature,
                std::unique_ptr<TensorT>* tensor) {
  TensorT* tensor_raw = new TensorT;
  tensor_raw->name = name;
  tensor_raw->shape = shape;
+  if (!shape_signature.empty()) {
+    tensor_raw->shape_signature = shape_signature;
+  }

  tensor->reset(tensor_raw);
 }
@ -419,8 +423,8 @@ TfLiteStatus QuantizeWeightsInt8(flatbuffers::FlatBufferBuilder* builder,
      // Create a new tensor to be the output of the dequantize op.
      std::unique_ptr<TensorT> dequantize_output;
      const string dequant_name = tensor->name + "_dequantize";
-      utils::MakeTensor(dequant_name, tensor->shape, TensorType_FLOAT32,
-                        &dequantize_output);
+      utils::MakeTensor(dequant_name, tensor->shape, tensor->shape_signature,
+                        TensorType_FLOAT32, &dequantize_output);
      const int32_t dequantize_output_idx = subgraph->tensors.size();
      subgraph->tensors.push_back(std::move(dequantize_output));

@ -503,8 +507,8 @@ TfLiteStatus QuantizeWeightsFloat16(flatbuffers::FlatBufferBuilder* builder,
      // Create a new tensor to be the output of the dequantize op.
      std::unique_ptr<TensorT> dequantize_output;
      const string dequant_name = tensor->name + "_dequantize";
-      utils::MakeTensor(dequant_name, tensor->shape, TensorType_FLOAT32,
-                        &dequantize_output);
+      utils::MakeTensor(dequant_name, tensor->shape, tensor->shape_signature,
+                        TensorType_FLOAT32, &dequantize_output);
      const int32_t dequantize_output_idx = subgraph->tensors.size();
      subgraph->tensors.push_back(std::move(dequantize_output));