From e775da774964f79f4e3605d4c6aa46823aa5ef7d Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Thu, 12 Dec 2019 11:13:33 -0800 Subject: [PATCH] Fix some comments and specifications for int8 quantization. The cl made the following changes: - add int8 to all the related argument comments - when the "inference_type" is int8, grappler optimization is disabled - use "inference_type", instead of "inference_input_type" to verify quant stats is specified when it is not post-training quantization. PiperOrigin-RevId: 285229735 Change-Id: Ie8da5c4d79fb60100c1041bd4573fe603cd304e6 --- tensorflow/lite/python/convert.py | 25 +++++++++++++------------ tensorflow/lite/python/convert_test.py | 17 +++++++++++++---- tensorflow/lite/python/lite.py | 7 ++++++- tensorflow/lite/python/lite_v2_test.py | 6 ++++++ 4 files changed, 38 insertions(+), 17 deletions(-) diff --git a/tensorflow/lite/python/convert.py b/tensorflow/lite/python/convert.py index 78bfdc82e29..bac1cb6c720 100644 --- a/tensorflow/lite/python/convert.py +++ b/tensorflow/lite/python/convert.py @@ -255,10 +255,10 @@ def build_toco_convert_protos(input_tensors, `foo.shape` and `foo.dtype`. output_tensors: List of output tensors (only .name is used from this). inference_type: Target data type of real-number arrays in the output file. - Must be `{tf.float32, tf.uint8}`. (default tf.float32) - Must be `{tf.float32, tf.uint8}`. (default `inference_type`) + Must be `{tf.float32, tf.uint8, tf.int8}`. (default tf.float32) inference_input_type: Target data type of real-number input arrays. Allows for a different type for input arrays in the case of quantization. + Must be `{tf.float32, tf.uint8, tf.int8}`. (default `inference_type`) input_format: Type of data to read Currently must be `{TENSORFLOW_GRAPHDEF}`. (default TENSORFLOW_GRAPHDEF) input_shapes: Input array shape. It needs to be a list of the same length @@ -267,7 +267,7 @@ def build_toco_convert_protos(input_tensors, GRAPHVIZ_DOT}`. (default TFLITE) quantized_input_stats: List of tuples of floats representing the mean and standard deviation. Each tuple maps to the corresponding input tensor. - Only need if `inference_input_type` is `QUANTIZED_UINT8`. + Only need if `inference_input_type` is `QUANTIZED_UINT8` or `INT8`. real_input_value = (quantized_input_value - mean_value) / std_dev_value. (default None) default_ranges_stats: Tuple of integers representing (min, max) range values @@ -363,11 +363,10 @@ def build_toco_convert_protos(input_tensors, input_array.data_type = util.convert_dtype_to_tflite_type( input_tensor.dtype) - if toco.inference_input_type in \ - [_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]: - if not quantized_input_stats: - raise ValueError("std_dev and mean must be defined when " - "inference_input_type is QUANTIZED_UINT8.") + if toco.inference_type in [_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]: + if not quantized_input_stats and not post_training_quantize: + raise ValueError("std_dev and mean must be defined when inference_type " + "is QUANTIZED_UINT8 or INT8.") input_array.mean_value, input_array.std_value = quantized_input_stats[idx] if input_shapes is None: shape = input_tensor.shape @@ -418,11 +417,13 @@ def toco_convert_graph_def(input_data, input_arrays_with_shape, output_arrays, for idx, (name, shape) in enumerate(input_arrays_with_shape): input_array = model_flags.input_arrays.add() - if toco_flags.inference_input_type == _types_pb2.QUANTIZED_UINT8: - if (("quantized_input_stats" not in kwargs) or - (not kwargs["quantized_input_stats"])): + if toco_flags.inference_type in ( + [_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]): + if ((("quantized_input_stats" not in kwargs) or + (not kwargs["quantized_input_stats"])) and + not toco_flags.post_training_quantize): raise ValueError("std_dev and mean must be defined when " - "inference_input_type is QUANTIZED_UINT8.") + "inference_type is QUANTIZED_UINT8 or INT8.") input_array.mean_value, input_array.std_value = kwargs[ "quantized_input_stats"][idx] input_array.name = name diff --git a/tensorflow/lite/python/convert_test.py b/tensorflow/lite/python/convert_test.py index 543ddda0d7c..fcd3128cbb4 100644 --- a/tensorflow/lite/python/convert_test.py +++ b/tensorflow/lite/python/convert_test.py @@ -76,8 +76,17 @@ class ConvertTest(test_util.TensorFlowTestCase): sess.graph_def, [in_tensor], [out_tensor], inference_type=lite_constants.QUANTIZED_UINT8) self.assertEqual( - "std_dev and mean must be defined when inference_input_type is " - "QUANTIZED_UINT8.", str(error.exception)) + "std_dev and mean must be defined when inference_type is " + "QUANTIZED_UINT8 or INT8.", str(error.exception)) + + with self.assertRaises(ValueError) as error: + convert.toco_convert( + sess.graph_def, [in_tensor], [out_tensor], + inference_type=lite_constants.QUANTIZED_UINT8, + inference_input_type=lite_constants.FLOAT) + self.assertEqual( + "std_dev and mean must be defined when inference_type is " + "QUANTIZED_UINT8 or INT8.", str(error.exception)) def testGraphDefBasic(self): with ops.Graph().as_default(): @@ -176,8 +185,8 @@ class ConvertTest(test_util.TensorFlowTestCase): enable_mlir_converter=False, inference_type=lite_constants.QUANTIZED_UINT8) self.assertEqual( - "std_dev and mean must be defined when inference_input_type is " - "QUANTIZED_UINT8.", str(error.exception)) + "std_dev and mean must be defined when inference_type is " + "QUANTIZED_UINT8 or INT8.", str(error.exception)) class ConvertTestOpHint(test_util.TensorFlowTestCase): diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index 3ae456e107b..83e97f156eb 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -998,7 +998,12 @@ class TFLiteConverter(TFLiteConverterBase): "are not enabled.") optimized_graph = self._graph_def - if self.inference_type != constants.QUANTIZED_UINT8: + # if it is not uint8 or int8 with post-training quantization, it is not + # quantization aware training, then graph optimization is applied. + # Graph optimization is disabled for quantization aware training. + if (self.inference_type != constants.QUANTIZED_UINT8 or + (self.inference_type == constants.INT8 and + (post_training_optimize or weight_only_quantize))): try: optimized_graph = _run_graph_optimizations( self._graph_def, diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index f3842ae4bdf..f4a6a4e6d19 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -787,6 +787,12 @@ class GrapplerTest(TestModels): actual_value = self._evaluateTFLiteModel(tflite_model, [input_data]) np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0]) + # Enable hybrid quantization, same result + converter.experimental_new_converter = True + converter.optimizations = [lite.Optimize.DEFAULT] + hybrid_tflite_model = converter.convert() + actual_value = self._evaluateTFLiteModel(hybrid_tflite_model, [input_data]) + np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0]) if __name__ == '__main__': test.main()