Fix some comments and specifications for int8 quantization.

The cl made the following changes:
- add int8 to all the related argument comments
- when the "inference_type" is int8, grappler optimization is disabled
- use "inference_type", instead of "inference_input_type" to verify quant stats is specified when it is not post-training quantization.

PiperOrigin-RevId: 285229735
Change-Id: Ie8da5c4d79fb60100c1041bd4573fe603cd304e6
This commit is contained in:
Feng Liu 2019-12-12 11:13:33 -08:00 committed by TensorFlower Gardener
parent 2844809b9e
commit e775da7749
4 changed files with 38 additions and 17 deletions

View File

@ -255,10 +255,10 @@ def build_toco_convert_protos(input_tensors,
`foo.shape` and `foo.dtype`. `foo.shape` and `foo.dtype`.
output_tensors: List of output tensors (only .name is used from this). output_tensors: List of output tensors (only .name is used from this).
inference_type: Target data type of real-number arrays in the output file. inference_type: Target data type of real-number arrays in the output file.
Must be `{tf.float32, tf.uint8}`. (default tf.float32) Must be `{tf.float32, tf.uint8, tf.int8}`. (default tf.float32)
Must be `{tf.float32, tf.uint8}`. (default `inference_type`)
inference_input_type: Target data type of real-number input arrays. Allows inference_input_type: Target data type of real-number input arrays. Allows
for a different type for input arrays in the case of quantization. for a different type for input arrays in the case of quantization.
Must be `{tf.float32, tf.uint8, tf.int8}`. (default `inference_type`)
input_format: Type of data to read Currently must be input_format: Type of data to read Currently must be
`{TENSORFLOW_GRAPHDEF}`. (default TENSORFLOW_GRAPHDEF) `{TENSORFLOW_GRAPHDEF}`. (default TENSORFLOW_GRAPHDEF)
input_shapes: Input array shape. It needs to be a list of the same length input_shapes: Input array shape. It needs to be a list of the same length
@ -267,7 +267,7 @@ def build_toco_convert_protos(input_tensors,
GRAPHVIZ_DOT}`. (default TFLITE) GRAPHVIZ_DOT}`. (default TFLITE)
quantized_input_stats: List of tuples of floats representing the mean and quantized_input_stats: List of tuples of floats representing the mean and
standard deviation. Each tuple maps to the corresponding input tensor. standard deviation. Each tuple maps to the corresponding input tensor.
Only need if `inference_input_type` is `QUANTIZED_UINT8`. Only need if `inference_input_type` is `QUANTIZED_UINT8` or `INT8`.
real_input_value = (quantized_input_value - mean_value) / std_dev_value. real_input_value = (quantized_input_value - mean_value) / std_dev_value.
(default None) (default None)
default_ranges_stats: Tuple of integers representing (min, max) range values default_ranges_stats: Tuple of integers representing (min, max) range values
@ -363,11 +363,10 @@ def build_toco_convert_protos(input_tensors,
input_array.data_type = util.convert_dtype_to_tflite_type( input_array.data_type = util.convert_dtype_to_tflite_type(
input_tensor.dtype) input_tensor.dtype)
if toco.inference_input_type in \ if toco.inference_type in [_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]:
[_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]: if not quantized_input_stats and not post_training_quantize:
if not quantized_input_stats: raise ValueError("std_dev and mean must be defined when inference_type "
raise ValueError("std_dev and mean must be defined when " "is QUANTIZED_UINT8 or INT8.")
"inference_input_type is QUANTIZED_UINT8.")
input_array.mean_value, input_array.std_value = quantized_input_stats[idx] input_array.mean_value, input_array.std_value = quantized_input_stats[idx]
if input_shapes is None: if input_shapes is None:
shape = input_tensor.shape shape = input_tensor.shape
@ -418,11 +417,13 @@ def toco_convert_graph_def(input_data, input_arrays_with_shape, output_arrays,
for idx, (name, shape) in enumerate(input_arrays_with_shape): for idx, (name, shape) in enumerate(input_arrays_with_shape):
input_array = model_flags.input_arrays.add() input_array = model_flags.input_arrays.add()
if toco_flags.inference_input_type == _types_pb2.QUANTIZED_UINT8: if toco_flags.inference_type in (
if (("quantized_input_stats" not in kwargs) or [_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]):
(not kwargs["quantized_input_stats"])): if ((("quantized_input_stats" not in kwargs) or
(not kwargs["quantized_input_stats"])) and
not toco_flags.post_training_quantize):
raise ValueError("std_dev and mean must be defined when " raise ValueError("std_dev and mean must be defined when "
"inference_input_type is QUANTIZED_UINT8.") "inference_type is QUANTIZED_UINT8 or INT8.")
input_array.mean_value, input_array.std_value = kwargs[ input_array.mean_value, input_array.std_value = kwargs[
"quantized_input_stats"][idx] "quantized_input_stats"][idx]
input_array.name = name input_array.name = name

View File

@ -76,8 +76,17 @@ class ConvertTest(test_util.TensorFlowTestCase):
sess.graph_def, [in_tensor], [out_tensor], sess.graph_def, [in_tensor], [out_tensor],
inference_type=lite_constants.QUANTIZED_UINT8) inference_type=lite_constants.QUANTIZED_UINT8)
self.assertEqual( self.assertEqual(
"std_dev and mean must be defined when inference_input_type is " "std_dev and mean must be defined when inference_type is "
"QUANTIZED_UINT8.", str(error.exception)) "QUANTIZED_UINT8 or INT8.", str(error.exception))
with self.assertRaises(ValueError) as error:
convert.toco_convert(
sess.graph_def, [in_tensor], [out_tensor],
inference_type=lite_constants.QUANTIZED_UINT8,
inference_input_type=lite_constants.FLOAT)
self.assertEqual(
"std_dev and mean must be defined when inference_type is "
"QUANTIZED_UINT8 or INT8.", str(error.exception))
def testGraphDefBasic(self): def testGraphDefBasic(self):
with ops.Graph().as_default(): with ops.Graph().as_default():
@ -176,8 +185,8 @@ class ConvertTest(test_util.TensorFlowTestCase):
enable_mlir_converter=False, enable_mlir_converter=False,
inference_type=lite_constants.QUANTIZED_UINT8) inference_type=lite_constants.QUANTIZED_UINT8)
self.assertEqual( self.assertEqual(
"std_dev and mean must be defined when inference_input_type is " "std_dev and mean must be defined when inference_type is "
"QUANTIZED_UINT8.", str(error.exception)) "QUANTIZED_UINT8 or INT8.", str(error.exception))
class ConvertTestOpHint(test_util.TensorFlowTestCase): class ConvertTestOpHint(test_util.TensorFlowTestCase):

View File

@ -998,7 +998,12 @@ class TFLiteConverter(TFLiteConverterBase):
"are not enabled.") "are not enabled.")
optimized_graph = self._graph_def optimized_graph = self._graph_def
if self.inference_type != constants.QUANTIZED_UINT8: # if it is not uint8 or int8 with post-training quantization, it is not
# quantization aware training, then graph optimization is applied.
# Graph optimization is disabled for quantization aware training.
if (self.inference_type != constants.QUANTIZED_UINT8 or
(self.inference_type == constants.INT8 and
(post_training_optimize or weight_only_quantize))):
try: try:
optimized_graph = _run_graph_optimizations( optimized_graph = _run_graph_optimizations(
self._graph_def, self._graph_def,

View File

@ -787,6 +787,12 @@ class GrapplerTest(TestModels):
actual_value = self._evaluateTFLiteModel(tflite_model, [input_data]) actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])
np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0]) np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0])
# Enable hybrid quantization, same result
converter.experimental_new_converter = True
converter.optimizations = [lite.Optimize.DEFAULT]
hybrid_tflite_model = converter.convert()
actual_value = self._evaluateTFLiteModel(hybrid_tflite_model, [input_data])
np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0])
if __name__ == '__main__': if __name__ == '__main__':
test.main() test.main()