From 7d605fb0e27643622f1fba7b77ba5e19fd210e02 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 28 May 2020 15:15:50 -0700
Subject: [PATCH] Add `inference_input_type` and `inference_output_type` flags
 in TF 2.x TFLiteConverter (backward compatible with TF 1.x) to support
 integer (tf.int8, tf.uint8) input and output types in post training full
 integer quantized models.

PiperOrigin-RevId: 313668965
Change-Id: Iea684507f58651b34dada0285b00a82e80066aab
---
 tensorflow/lite/python/lite.py         |  93 +++------------
 tensorflow/lite/python/lite_v2_test.py | 156 +++----------------------
 2 files changed, 32 insertions(+), 217 deletions(-)

diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py
index af42c28172a..53814bb0c43 100644
--- a/tensorflow/lite/python/lite.py
+++ b/tensorflow/lite/python/lite.py
@@ -201,11 +201,6 @@ class QuantizationMode(object):
             self._representative_dataset is not None and
             self._smallest_supported_type() == constants.INT8)
 
-  def is_post_training_integer_quantize(self):
-    """Post training integer quantization."""
-    return (self.post_training_int8_no_float() or
-            self.post_training_int8_allow_float())
-
   def training_time_int8_allow_float(self):
     """Training-time int8 quantize, allow float fallback."""
     return (self._any_optimization_enabled() and
@@ -418,56 +413,7 @@ class TFLiteConverterBase(object):
 
 
 class TFLiteConverterBaseV2(TFLiteConverterBase):
-  """Converter subclass to share functionality between V2 converters.
-
-  Attributes:
-    allow_custom_ops: Boolean indicating whether to allow custom operations.
-      When False, any unknown operation is an error. When True, custom ops are
-      created for any op that is unknown. The developer needs to provide these
-      to the TensorFlow Lite runtime with a custom resolver. (default False)
-    optimizations: Experimental flag, subject to change. A list of optimizations
-      to apply when converting the model. E.g. `[Optimize.DEFAULT]`
-    representative_dataset: A representative dataset that can be used to
-      generate input and output samples for the model. The converter can use the
-      dataset to evaluate different optimizations. Note that this is an optional
-      attribute but it is necessary if INT8 is the only support builtin ops in
-      target ops.
-    target_spec: Experimental flag, subject to change. Specification of target
-      device.
-    inference_input_type: Data type of the input layer. Note that integer types
-      (tf.int8 and tf.uint8) are currently only supported for post training
-      integer quantization. (default tf.float32, must be in {tf.float32,
-      tf.int8, tf.uint8})
-    inference_output_type: Data type of the output layer. Note that integer
-      types (tf.int8 and tf.uint8) are currently only supported for post
-      training integer quantization. (default tf.float32, must be in
-      {tf.float32, tf.int8, tf.uint8})
-    experimental_new_converter: Experimental flag, subject to change. Enables
-      MLIR-based conversion instead of TOCO conversion.
-  """
-
-  def __init__(self):
-    """Constructor for TFLiteConverter."""
-    super(TFLiteConverterBaseV2, self).__init__()
-    self.inference_input_type = constants.FLOAT
-    self.inference_output_type = constants.FLOAT
-
-  def _validate_inference_input_output_types(self, quant_mode):
-    """Validate inference_input_type and inference_output_type flags."""
-    default_types = [constants.FLOAT, None]
-    # We only support integer types for post training integer quantization
-    # as we have statistical information to quantize the input and output.
-    if quant_mode.is_post_training_integer_quantize():
-      all_types = default_types + [constants.INT8, constants.QUANTIZED_UINT8]
-      if self.inference_input_type not in all_types or \
-          self.inference_output_type not in all_types:
-        all_types_names = ["tf." + t.name for t in all_types]
-        raise ValueError("The inference_input_type and inference_output_type "
-                         "must be in {}.".format(all_types_names))
-    elif self.inference_input_type not in default_types or \
-        self.inference_output_type not in default_types:
-      raise ValueError("The inference_input_type and inference_output_type "
-                       "must be tf.float32.")
+  """Converter subclass to share functionality between V2 converters."""
 
   def convert(self, graph_def, input_tensors, output_tensors):
     """Converts a TensorFlow GraphDef based on instance variables.
@@ -491,8 +437,6 @@ class TFLiteConverterBaseV2(TFLiteConverterBase):
     quant_mode = QuantizationMode(self.optimizations, self.target_spec,
                                   self.representative_dataset, graph_def)
 
-    self._validate_inference_input_output_types(quant_mode)
-
     if not self._is_unknown_shapes_allowed():
       # Checks dimensions in input tensor.
       for tensor in input_tensors:
@@ -535,9 +479,6 @@ class TFLiteConverterBaseV2(TFLiteConverterBase):
           "quantize_to_float16": True,
       })
 
-    # Converter requires that the inference_input_type flag is set to FLOAT
-    converter_kwargs.update({"inference_input_type": constants.FLOAT})
-
     if not self.experimental_new_converter:
       logging.warning(
           "Please consider switching to use new converter by setting "
@@ -557,11 +498,11 @@ class TFLiteConverterBaseV2(TFLiteConverterBase):
         **converter_kwargs)
 
     if quant_mode.post_training_int8_no_float():
-      result = self._calibrate_quantize_model(result, self.inference_input_type,
-                                              self.inference_output_type, False)
+      result = self._calibrate_quantize_model(result, constants.FLOAT,
+                                              constants.FLOAT, False)
     elif quant_mode.post_training_int8_allow_float():
-      result = self._calibrate_quantize_model(result, self.inference_input_type,
-                                              self.inference_output_type, True)
+      result = self._calibrate_quantize_model(result, constants.FLOAT,
+                                              constants.FLOAT, True)
 
     if self._experimental_sparsify_model:
       result = _mlir_sparsify(result)
@@ -817,9 +758,12 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2):
 
   Attributes:
     allow_custom_ops: Boolean indicating whether to allow custom operations.
-      When False, any unknown operation is an error. When True, custom ops are
-      created for any op that is unknown. The developer needs to provide these
-      to the TensorFlow Lite runtime with a custom resolver. (default False)
+      When false any unknown operation is an error. When true, custom ops are
+      created for any op that is unknown. The developer will need to provide
+      these to the TensorFlow Lite runtime with a custom resolver.
+      (default False)
+    target_spec: Experimental flag, subject to change. Specification of target
+      device.
     optimizations: Experimental flag, subject to change. A list of optimizations
       to apply when converting the model. E.g. `[Optimize.DEFAULT]`
     representative_dataset: A representative dataset that can be used to
@@ -827,19 +771,8 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2):
       dataset to evaluate different optimizations. Note that this is an optional
       attribute but it is necessary if INT8 is the only support builtin ops in
       target ops.
-    target_spec: Experimental flag, subject to change. Specification of target
-      device.
-    inference_input_type: Data type of the input layer. Note that integer types
-      (tf.int8 and tf.uint8) are currently only supported for post training
-      integer quantization. (default tf.float32, must be in {tf.float32,
-      tf.int8, tf.uint8})
-    inference_output_type: Data type of the output layer. Note that integer
-      types (tf.int8 and tf.uint8) are currently only supported for post
-      training integer quantization. (default tf.float32, must be in
-      {tf.float32, tf.int8, tf.uint8})
-    experimental_new_converter: Experimental flag, subject to change. Enables
-      MLIR-based conversion instead of TOCO conversion.
-
+    experimental_new_converter: Experimental flag, subject to change.
+      Enables MLIR-based conversion instead of TOCO conversion.
   Example usage:
 
     ```python
diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py
index fae55e99cd1..9af37df2975 100644
--- a/tensorflow/lite/python/lite_v2_test.py
+++ b/tensorflow/lite/python/lite_v2_test.py
@@ -71,27 +71,6 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
     actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])
     self.assertEqual(expected_value.numpy(), actual_value)
 
-  @parameterized.named_parameters(
-      ('_INT8InputOutput', lite.constants.INT8),
-      ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8))
-  @test_util.run_v2_only
-  def testInvalidFloat(self, inference_input_output_type):
-    root = self._getSimpleVariableModel()
-    input_data = tf.constant(1., shape=[1])
-    concrete_func = root.f.get_concrete_function(input_data)
-
-    # Convert model.
-    converter = lite.TFLiteConverterV2.from_concrete_functions([concrete_func])
-    # We don't support integer types as we don't have statistical information
-    # to quantize (only supported for post training integer quantization).
-    with self.assertRaises(ValueError) as error:
-      converter.inference_input_type = inference_input_output_type
-      converter.inference_output_type = inference_input_output_type
-      converter.convert()
-    self.assertEqual(
-        'The inference_input_type and inference_output_type '
-        'must be tf.float32.', str(error.exception))
-
   @test_util.run_v2_only
   def testScalarInput(self):
     root = self._getSimpleVariableModel()
@@ -193,113 +172,39 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
     self.assertLess(len(quantized_tflite), len(float_tflite))
 
   @parameterized.named_parameters(
-      ('_INT8InputOutput', lite.constants.INT8),
-      ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8))
-  @test_util.run_v2_only
-  def testInvalidPostTrainingDynamicRangeQuantization(
-      self, inference_input_output_type):
-    func, _ = self._getCalibrationQuantizeModel()
-
-    # Convert float model.
-    converter = lite.TFLiteConverterV2.from_concrete_functions([func])
-    tflite_model = converter.convert()
-    self.assertTrue(tflite_model)
-
-    # Convert quantized model.
-    quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func])
-    quantized_converter.optimizations = [lite.Optimize.DEFAULT]
-    # We don't support integer types as we don't have statistical information
-    # to quantize (only supported for post training integer quantization).
-    with self.assertRaises(ValueError) as error:
-      quantized_converter.inference_input_type = inference_input_output_type
-      quantized_converter.inference_output_type = inference_input_output_type
-      quantized_converter.convert()
-    self.assertEqual(
-        'The inference_input_type and inference_output_type '
-        'must be tf.float32.', str(error.exception))
-
-  @parameterized.named_parameters(
-      ('_DefaultFLOAT32InputOutput', lite.constants.FLOAT),
-      ('_INT8InputOutput', lite.constants.INT8),
-      ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8))
-  def testPostTrainingIntegerAllowFloatQuantization(
-      self, inference_input_output_type):
+      ('EnableMlirQuantizer', True),  # enable mlir quantizer
+      ('DisableMlirQuantizer', False))  # disable mlir quantizer
+  def testCalibrateAndQuantizeBuiltinInt8(self, mlir_quantizer):
     func, calibration_gen = self._getCalibrationQuantizeModel()
 
     # Convert float model.
-    converter = lite.TFLiteConverterV2.from_concrete_functions([func])
-    tflite_model = converter.convert()
-    self.assertTrue(tflite_model)
-
-    # Convert quantized model.
-    quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func])
-    quantized_converter.optimizations = [lite.Optimize.DEFAULT]
-    quantized_converter.representative_dataset = calibration_gen
-    quantized_converter.inference_input_type = inference_input_output_type
-    quantized_converter.inference_output_type = inference_input_output_type
-    quantized_tflite_model = quantized_converter.convert()
-    self.assertTrue(quantized_tflite_model)
-
-    interpreter = Interpreter(model_content=quantized_tflite_model)
-    interpreter.allocate_tensors()
-    input_details = interpreter.get_input_details()
-    self.assertLen(input_details, 1)
-    self.assertEqual(inference_input_output_type.as_numpy_dtype,
-                     input_details[0]['dtype'])
-    output_details = interpreter.get_output_details()
-    self.assertLen(output_details, 1)
-    self.assertEqual(inference_input_output_type.as_numpy_dtype,
-                     output_details[0]['dtype'])
-
-    # Ensure that the quantized tflite model is smaller.
-    self.assertLess(len(quantized_tflite_model), len(tflite_model))
-
-  @parameterized.named_parameters(
-      ('_DefaultFLOAT32InputOutput_UseTargetTypesFlag',
-       lite.constants.FLOAT, False),
-      ('_DefaultFLOAT32InputOutput', lite.constants.FLOAT, True),
-      ('_INT8InputOutput', lite.constants.INT8, True),
-      ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8, True))
-  @test_util.run_v2_only
-  def testPostTrainingIntegerNoFloatQuantization(self,
-                                                 inference_input_output_type,
-                                                 use_target_ops_flag):
-    func, calibration_gen = self._getCalibrationQuantizeModel()
-
-    # Convert float model.
-    converter = lite.TFLiteConverterV2.from_concrete_functions([func])
-    tflite_model = converter.convert()
-    self.assertTrue(tflite_model)
+    float_converter = lite.TFLiteConverterV2.from_concrete_functions([func])
+    float_tflite = float_converter.convert()
+    self.assertTrue(float_tflite)
 
     # Convert model by specifying target spec (instead of optimizations), since
     # when targeting an integer only backend, quantization is mandatory.
     quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func])
-    quantized_converter.optimizations = [lite.Optimize.DEFAULT]
+    quantized_converter.target_spec.supported_ops = [
+        lite.OpsSet.TFLITE_BUILTINS_INT8
+    ]
     quantized_converter.representative_dataset = calibration_gen
-    if use_target_ops_flag:
-      quantized_converter.target_spec.supported_ops = [
-          lite.OpsSet.TFLITE_BUILTINS_INT8
-      ]
-    else:
-      quantized_converter.target_spec.supported_types = [lite.constants.INT8]
-    quantized_converter.inference_input_type = inference_input_output_type
-    quantized_converter.inference_output_type = inference_input_output_type
-    quantized_tflite_model = quantized_converter.convert()
-    self.assertTrue(quantized_tflite_model)
+    quantized_converter._experimental_new_quantizer = mlir_quantizer
+    quantized_tflite = quantized_converter.convert()
+    self.assertTrue(quantized_tflite)
 
-    interpreter = Interpreter(model_content=quantized_tflite_model)
+    # The default input and output types should be float.
+    interpreter = Interpreter(model_content=quantized_tflite)
     interpreter.allocate_tensors()
     input_details = interpreter.get_input_details()
     self.assertLen(input_details, 1)
-    self.assertEqual(inference_input_output_type.as_numpy_dtype,
-                     input_details[0]['dtype'])
+    self.assertEqual(np.float32, input_details[0]['dtype'])
     output_details = interpreter.get_output_details()
     self.assertLen(output_details, 1)
-    self.assertEqual(inference_input_output_type.as_numpy_dtype,
-                     output_details[0]['dtype'])
+    self.assertEqual(np.float32, output_details[0]['dtype'])
 
-    # Ensure that the quantized tflite model is smaller.
-    self.assertLess(len(quantized_tflite_model), len(tflite_model))
+    # Ensure that the quantized weights tflite model is smaller.
+    self.assertLess(len(quantized_tflite), len(float_tflite))
 
   def testCalibrateAndQuantizeBuiltinInt16(self):
     func, calibration_gen = self._getCalibrationQuantizeModel()
@@ -374,7 +279,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
     return tf.keras.Sequential(QLinear(3, input_shape=(2,)))
 
   @test_util.run_v2_only
-  def testTrainingTimeQuantization(self):
+  def testTrainingTimeQuantizeConversion(self):
     model = self._getTrainingTimeQuantizedModel()
 
     float_converter = lite.TFLiteConverterV2.from_keras_model(model)
@@ -392,29 +297,6 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
     interpreter = Interpreter(model_content=quantized_tflite)
     self.assertEqual(np.float32, interpreter.get_input_details()[0]['dtype'])
 
-  @parameterized.named_parameters(
-      ('_INT8InputOutput', lite.constants.INT8),
-      ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8))
-  def testInvalidTrainingTimeQuantization(self, inference_input_output_type):
-    # We currently don't support integer inference_input_type and
-    # inference_output_type flags for training time quantization.
-
-    model = self._getTrainingTimeQuantizedModel()
-
-    converter = lite.TFLiteConverterV2.from_keras_model(model)
-    tflite_model = converter.convert()
-    self.assertTrue(tflite_model)
-
-    quantized_converter = lite.TFLiteConverterV2.from_keras_model(model)
-    quantized_converter.optimizations = [lite.Optimize.DEFAULT]
-    with self.assertRaises(ValueError) as error:
-      quantized_converter.inference_input_type = inference_input_output_type
-      quantized_converter.inference_output_type = inference_input_output_type
-      quantized_converter.convert()
-    self.assertEqual(
-        'The inference_input_type and inference_output_type '
-        'must be tf.float32.', str(error.exception))
-
   @test_util.run_v2_only
   def testNewQuantizer(self):
     """Test the model quantized by the new converter."""