Fix some comments and specifications for int8 quantization.
The cl made the following changes: - add int8 to all the related argument comments - when the "inference_type" is int8, grappler optimization is disabled - use "inference_type", instead of "inference_input_type" to verify quant stats is specified when it is not post-training quantization. PiperOrigin-RevId: 285229735 Change-Id: Ie8da5c4d79fb60100c1041bd4573fe603cd304e6
This commit is contained in:
parent
2844809b9e
commit
e775da7749
@ -255,10 +255,10 @@ def build_toco_convert_protos(input_tensors,
|
||||
`foo.shape` and `foo.dtype`.
|
||||
output_tensors: List of output tensors (only .name is used from this).
|
||||
inference_type: Target data type of real-number arrays in the output file.
|
||||
Must be `{tf.float32, tf.uint8}`. (default tf.float32)
|
||||
Must be `{tf.float32, tf.uint8}`. (default `inference_type`)
|
||||
Must be `{tf.float32, tf.uint8, tf.int8}`. (default tf.float32)
|
||||
inference_input_type: Target data type of real-number input arrays. Allows
|
||||
for a different type for input arrays in the case of quantization.
|
||||
Must be `{tf.float32, tf.uint8, tf.int8}`. (default `inference_type`)
|
||||
input_format: Type of data to read Currently must be
|
||||
`{TENSORFLOW_GRAPHDEF}`. (default TENSORFLOW_GRAPHDEF)
|
||||
input_shapes: Input array shape. It needs to be a list of the same length
|
||||
@ -267,7 +267,7 @@ def build_toco_convert_protos(input_tensors,
|
||||
GRAPHVIZ_DOT}`. (default TFLITE)
|
||||
quantized_input_stats: List of tuples of floats representing the mean and
|
||||
standard deviation. Each tuple maps to the corresponding input tensor.
|
||||
Only need if `inference_input_type` is `QUANTIZED_UINT8`.
|
||||
Only need if `inference_input_type` is `QUANTIZED_UINT8` or `INT8`.
|
||||
real_input_value = (quantized_input_value - mean_value) / std_dev_value.
|
||||
(default None)
|
||||
default_ranges_stats: Tuple of integers representing (min, max) range values
|
||||
@ -363,11 +363,10 @@ def build_toco_convert_protos(input_tensors,
|
||||
input_array.data_type = util.convert_dtype_to_tflite_type(
|
||||
input_tensor.dtype)
|
||||
|
||||
if toco.inference_input_type in \
|
||||
[_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]:
|
||||
if not quantized_input_stats:
|
||||
raise ValueError("std_dev and mean must be defined when "
|
||||
"inference_input_type is QUANTIZED_UINT8.")
|
||||
if toco.inference_type in [_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]:
|
||||
if not quantized_input_stats and not post_training_quantize:
|
||||
raise ValueError("std_dev and mean must be defined when inference_type "
|
||||
"is QUANTIZED_UINT8 or INT8.")
|
||||
input_array.mean_value, input_array.std_value = quantized_input_stats[idx]
|
||||
if input_shapes is None:
|
||||
shape = input_tensor.shape
|
||||
@ -418,11 +417,13 @@ def toco_convert_graph_def(input_data, input_arrays_with_shape, output_arrays,
|
||||
|
||||
for idx, (name, shape) in enumerate(input_arrays_with_shape):
|
||||
input_array = model_flags.input_arrays.add()
|
||||
if toco_flags.inference_input_type == _types_pb2.QUANTIZED_UINT8:
|
||||
if (("quantized_input_stats" not in kwargs) or
|
||||
(not kwargs["quantized_input_stats"])):
|
||||
if toco_flags.inference_type in (
|
||||
[_types_pb2.QUANTIZED_UINT8, _types_pb2.INT8]):
|
||||
if ((("quantized_input_stats" not in kwargs) or
|
||||
(not kwargs["quantized_input_stats"])) and
|
||||
not toco_flags.post_training_quantize):
|
||||
raise ValueError("std_dev and mean must be defined when "
|
||||
"inference_input_type is QUANTIZED_UINT8.")
|
||||
"inference_type is QUANTIZED_UINT8 or INT8.")
|
||||
input_array.mean_value, input_array.std_value = kwargs[
|
||||
"quantized_input_stats"][idx]
|
||||
input_array.name = name
|
||||
|
@ -76,8 +76,17 @@ class ConvertTest(test_util.TensorFlowTestCase):
|
||||
sess.graph_def, [in_tensor], [out_tensor],
|
||||
inference_type=lite_constants.QUANTIZED_UINT8)
|
||||
self.assertEqual(
|
||||
"std_dev and mean must be defined when inference_input_type is "
|
||||
"QUANTIZED_UINT8.", str(error.exception))
|
||||
"std_dev and mean must be defined when inference_type is "
|
||||
"QUANTIZED_UINT8 or INT8.", str(error.exception))
|
||||
|
||||
with self.assertRaises(ValueError) as error:
|
||||
convert.toco_convert(
|
||||
sess.graph_def, [in_tensor], [out_tensor],
|
||||
inference_type=lite_constants.QUANTIZED_UINT8,
|
||||
inference_input_type=lite_constants.FLOAT)
|
||||
self.assertEqual(
|
||||
"std_dev and mean must be defined when inference_type is "
|
||||
"QUANTIZED_UINT8 or INT8.", str(error.exception))
|
||||
|
||||
def testGraphDefBasic(self):
|
||||
with ops.Graph().as_default():
|
||||
@ -176,8 +185,8 @@ class ConvertTest(test_util.TensorFlowTestCase):
|
||||
enable_mlir_converter=False,
|
||||
inference_type=lite_constants.QUANTIZED_UINT8)
|
||||
self.assertEqual(
|
||||
"std_dev and mean must be defined when inference_input_type is "
|
||||
"QUANTIZED_UINT8.", str(error.exception))
|
||||
"std_dev and mean must be defined when inference_type is "
|
||||
"QUANTIZED_UINT8 or INT8.", str(error.exception))
|
||||
|
||||
|
||||
class ConvertTestOpHint(test_util.TensorFlowTestCase):
|
||||
|
@ -998,7 +998,12 @@ class TFLiteConverter(TFLiteConverterBase):
|
||||
"are not enabled.")
|
||||
|
||||
optimized_graph = self._graph_def
|
||||
if self.inference_type != constants.QUANTIZED_UINT8:
|
||||
# if it is not uint8 or int8 with post-training quantization, it is not
|
||||
# quantization aware training, then graph optimization is applied.
|
||||
# Graph optimization is disabled for quantization aware training.
|
||||
if (self.inference_type != constants.QUANTIZED_UINT8 or
|
||||
(self.inference_type == constants.INT8 and
|
||||
(post_training_optimize or weight_only_quantize))):
|
||||
try:
|
||||
optimized_graph = _run_graph_optimizations(
|
||||
self._graph_def,
|
||||
|
@ -787,6 +787,12 @@ class GrapplerTest(TestModels):
|
||||
actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])
|
||||
np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0])
|
||||
|
||||
# Enable hybrid quantization, same result
|
||||
converter.experimental_new_converter = True
|
||||
converter.optimizations = [lite.Optimize.DEFAULT]
|
||||
hybrid_tflite_model = converter.convert()
|
||||
actual_value = self._evaluateTFLiteModel(hybrid_tflite_model, [input_data])
|
||||
np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0])
|
||||
|
||||
if __name__ == '__main__':
|
||||
test.main()
|
||||
|
Loading…
Reference in New Issue
Block a user