Fix some comments and specifications for int8 quantization.

The cl made the following changes: - add int8 to all the related argument comments - when the "inference_type" is int8, grappler optimization is disabled - use "inference_type", instead of "inference_input_type" to verify quant stats is specified when it is not post-training quantization. PiperOrigin-RevId: 285229735 Change-Id: Ie8da5c4d79fb60100c1041bd4573fe603cd304e6
2019-12-12 11:13:33 -08:00 · 2019-12-12 11:13:33 -08:00 · e775da7749
commit e775da7749
parent 2844809b9e
4 changed files with 38 additions and 17 deletions
--- a/tensorflow/lite/python/convert.py
+++ b/tensorflow/lite/python/convert.py
@ -255,10 +255,10 @@ def build_toco_convert_protos(input_tensors,
      `foo.shape` and `foo.dtype`.
    output_tensors: List of output tensors (only .name is used from this).
    inference_type: Target data type of real-number arrays in the output file.
-      Must be `{tf.float32, tf.uint8}`.  (default tf.float32)
-      Must be `{tf.float32, tf.uint8}`. (default `inference_type`)
+      Must be `{tf.float32, tf.uint8, tf.int8}`.  (default tf.float32)
    inference_input_type: Target data type of real-number input arrays. Allows
      for a different type for input arrays in the case of quantization.
+      Must be `{tf.float32, tf.uint8, tf.int8}`. (default `inference_type`)
    input_format: Type of data to read Currently must be
      `{TENSORFLOW_GRAPHDEF}`. (default TENSORFLOW_GRAPHDEF)
    input_shapes: Input array shape. It needs to be a list of the same length
@ -267,7 +267,7 @@ def build_toco_convert_protos(input_tensors,
      GRAPHVIZ_DOT}`. (default TFLITE)
    quantized_input_stats: List of tuples of floats representing the mean and
      standard deviation. Each tuple maps to the corresponding input tensor.
-      Only need if `inference_input_type` is `QUANTIZED_UINT8`.
+      Only need if `inference_input_type` is `QUANTIZED_UINT8` or `INT8`.
      real_input_value = (quantized_input_value - mean_value) / std_dev_value.
      (default None)
    default_ranges_stats: Tuple of integers representing (min, max) range values
@ -363,11 +363,10 @@ def build_toco_convert_protos(input_tensors,
    input_array.data_type = util.convert_dtype_to_tflite_type(
        input_tensor.dtype)

-    if toco.inference_input_type in \
-        [_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]:
-      if not quantized_input_stats:
-        raise ValueError("std_dev and mean must be defined when "
-                         "inference_input_type is QUANTIZED_UINT8.")
+    if toco.inference_type in [_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]:
+      if not quantized_input_stats and not post_training_quantize:
+        raise ValueError("std_dev and mean must be defined when inference_type "
+                         "is QUANTIZED_UINT8 or INT8.")
      input_array.mean_value, input_array.std_value = quantized_input_stats[idx]
    if input_shapes is None:
      shape = input_tensor.shape
@ -418,11 +417,13 @@ def toco_convert_graph_def(input_data, input_arrays_with_shape, output_arrays,

  for idx, (name, shape) in enumerate(input_arrays_with_shape):
    input_array = model_flags.input_arrays.add()
-    if toco_flags.inference_input_type == _types_pb2.QUANTIZED_UINT8:
-      if (("quantized_input_stats" not in kwargs) or
-          (not kwargs["quantized_input_stats"])):
+    if toco_flags.inference_type in (
+        [_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]):
+      if ((("quantized_input_stats" not in kwargs) or
+           (not kwargs["quantized_input_stats"])) and
+          not toco_flags.post_training_quantize):
        raise ValueError("std_dev and mean must be defined when "
-                         "inference_input_type is QUANTIZED_UINT8.")
+                         "inference_type is QUANTIZED_UINT8 or INT8.")
      input_array.mean_value, input_array.std_value = kwargs[
          "quantized_input_stats"][idx]
    input_array.name = name
--- a/tensorflow/lite/python/convert_test.py
+++ b/tensorflow/lite/python/convert_test.py
@ -76,8 +76,17 @@ class ConvertTest(test_util.TensorFlowTestCase):
          sess.graph_def, [in_tensor], [out_tensor],
          inference_type=lite_constants.QUANTIZED_UINT8)
    self.assertEqual(
-        "std_dev and mean must be defined when inference_input_type is "
-        "QUANTIZED_UINT8.", str(error.exception))
+        "std_dev and mean must be defined when inference_type is "
+        "QUANTIZED_UINT8 or INT8.", str(error.exception))
+
+    with self.assertRaises(ValueError) as error:
+      convert.toco_convert(
+          sess.graph_def, [in_tensor], [out_tensor],
+          inference_type=lite_constants.QUANTIZED_UINT8,
+          inference_input_type=lite_constants.FLOAT)
+    self.assertEqual(
+        "std_dev and mean must be defined when inference_type is "
+        "QUANTIZED_UINT8 or INT8.", str(error.exception))

  def testGraphDefBasic(self):
    with ops.Graph().as_default():
@ -176,8 +185,8 @@ class ConvertTest(test_util.TensorFlowTestCase):
          enable_mlir_converter=False,
          inference_type=lite_constants.QUANTIZED_UINT8)
    self.assertEqual(
-        "std_dev and mean must be defined when inference_input_type is "
-        "QUANTIZED_UINT8.", str(error.exception))
+        "std_dev and mean must be defined when inference_type is "
+        "QUANTIZED_UINT8 or INT8.", str(error.exception))


 class ConvertTestOpHint(test_util.TensorFlowTestCase):
--- a/tensorflow/lite/python/lite.py
+++ b/tensorflow/lite/python/lite.py
@ -998,7 +998,12 @@ class TFLiteConverter(TFLiteConverterBase):
          "are not enabled.")

    optimized_graph = self._graph_def
-    if self.inference_type != constants.QUANTIZED_UINT8:
+    # if it is not uint8 or int8 with post-training quantization, it is not
+    # quantization aware training, then graph optimization is applied.
+    # Graph optimization is disabled for quantization aware training.
+    if (self.inference_type != constants.QUANTIZED_UINT8 or
+        (self.inference_type == constants.INT8 and
+         (post_training_optimize or weight_only_quantize))):
      try:
        optimized_graph = _run_graph_optimizations(
            self._graph_def,
--- a/tensorflow/lite/python/lite_v2_test.py
+++ b/tensorflow/lite/python/lite_v2_test.py
@ -787,6 +787,12 @@ class GrapplerTest(TestModels):
    actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])
    np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0])

+    # Enable hybrid quantization, same result
+    converter.experimental_new_converter = True
+    converter.optimizations = [lite.Optimize.DEFAULT]
+    hybrid_tflite_model = converter.convert()
+    actual_value = self._evaluateTFLiteModel(hybrid_tflite_model, [input_data])
+    np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0])

 if __name__ == '__main__':
  test.main()