Make the experimental_new_quantizer flag to be public
PiperOrigin-RevId: 351421198 Change-Id: I90a1df2a7d6ad074cc7f0d1d04a005a16580fb4c
This commit is contained in:
parent
3b5bb5a706
commit
201ac54c8d
tensorflow/lite
g3doc/tutorials
python
testing
@ -601,7 +601,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config = configs.QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n",
|
||||
"config._experimental_new_quantizer = True"
|
||||
"config.experimental_new_quantizer = True"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -476,7 +476,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config = configs.QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n",
|
||||
"config._experimental_new_quantizer = True"
|
||||
"config.experimental_new_quantizer = True"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -429,7 +429,8 @@ class TFLiteConverterBase(object):
|
||||
self.target_spec = TargetSpec()
|
||||
self.allow_custom_ops = False
|
||||
self.experimental_new_converter = True
|
||||
self._experimental_new_quantizer = False
|
||||
self.experimental_new_quantizer = False
|
||||
self._experimental_new_quantizer = None
|
||||
self._experimental_calibrate_only = False
|
||||
self._experimental_sparsify_model = False
|
||||
self._debug_info = None # contains the stack traces of all the original
|
||||
@ -474,13 +475,13 @@ class TFLiteConverterBase(object):
|
||||
# Add intermediate tensors to the model if needed.
|
||||
result = _calibrator.add_intermediate_tensors(result)
|
||||
calibrate_quantize = _calibrator.Calibrator(result)
|
||||
if self._experimental_calibrate_only or self._experimental_new_quantizer:
|
||||
if self._experimental_calibrate_only or self.experimental_new_quantizer:
|
||||
calibrated = calibrate_quantize.calibrate(
|
||||
self.representative_dataset.input_gen)
|
||||
|
||||
if self._experimental_calibrate_only:
|
||||
return calibrated
|
||||
elif self._experimental_new_quantizer and (
|
||||
elif self.experimental_new_quantizer and (
|
||||
activations_type != _dtypes.int16):
|
||||
# TODO(b/175659372): remove the activations_type restriction and enable
|
||||
# it for all the activation types.
|
||||
@ -565,6 +566,10 @@ class TFLiteConverterBase(object):
|
||||
def _sparsify_model(self):
|
||||
return Optimize.EXPERIMENTAL_SPARSITY in self.optimizations
|
||||
|
||||
def _validate_experimental_new_quantizer_flag(self):
|
||||
if self._experimental_new_quantizer is not None:
|
||||
raise ValueError("Please use 'experimental_new_quantizer' instead.")
|
||||
|
||||
|
||||
class TFLiteConverterBaseV2(TFLiteConverterBase):
|
||||
"""Converter subclass to share functionality between V2 converters."""
|
||||
@ -617,6 +622,7 @@ class TFLiteConverterBaseV2(TFLiteConverterBase):
|
||||
self.representative_dataset, graph_def)
|
||||
|
||||
self._validate_inference_input_output_types(quant_mode)
|
||||
self._validate_experimental_new_quantizer_flag()
|
||||
|
||||
if not self._is_unknown_shapes_allowed():
|
||||
# Checks dimensions in input tensor.
|
||||
@ -1011,6 +1017,9 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2):
|
||||
to the TensorFlow Lite runtime with a custom resolver. (default False)
|
||||
experimental_new_converter: Experimental flag, subject to change. Enables
|
||||
MLIR-based conversion instead of TOCO conversion. (default True)
|
||||
experimental_new_quantizer: Experimental flag, subject to change. Enables
|
||||
MLIR-based quantization conversion instead of Flatbuffer-based conversion.
|
||||
(default False)
|
||||
|
||||
Example usage:
|
||||
|
||||
@ -1340,6 +1349,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase):
|
||||
calibrate_quantize, flags = quant_mode.quantizer_flags()
|
||||
|
||||
self._validate_quantized_input_stats(converter_kwargs, calibrate_quantize)
|
||||
self._validate_experimental_new_quantizer_flag()
|
||||
|
||||
# Converts model.
|
||||
if self._has_valid_tensors():
|
||||
@ -1358,7 +1368,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase):
|
||||
if calibrate_quantize:
|
||||
result = self._calibrate_quantize_model(result, **flags)
|
||||
|
||||
if self.experimental_new_converter or self._experimental_new_quantizer:
|
||||
if self.experimental_new_converter or self.experimental_new_quantizer:
|
||||
flags_modify_model_io_type = quant_mode.flags_modify_model_io_type(
|
||||
self.inference_input_type, self.inference_output_type)
|
||||
if flags_modify_model_io_type:
|
||||
@ -1731,6 +1741,9 @@ class TFLiteConverter(TFLiteFrozenGraphConverter):
|
||||
set it to `{tf.lite.Optimize.DEFAULT}`. (default False)
|
||||
experimental_new_converter: Experimental flag, subject to change. Enables
|
||||
MLIR-based conversion instead of TOCO conversion. (default True)
|
||||
experimental_new_quantizer: Experimental flag, subject to change. Enables
|
||||
MLIR-based quantization conversion instead of Flatbuffer-based conversion.
|
||||
(default False)
|
||||
|
||||
Example usage:
|
||||
|
||||
|
@ -367,7 +367,7 @@ class FromSessionTest(TestModels, parameterized.TestCase):
|
||||
|
||||
quantized_converter.inference_input_type = inference_input_output_type
|
||||
quantized_converter.inference_output_type = inference_input_output_type
|
||||
quantized_converter._experimental_new_quantizer = enable_mlir_quantizer
|
||||
quantized_converter.experimental_new_quantizer = enable_mlir_quantizer
|
||||
quantized_tflite_model = quantized_converter.convert()
|
||||
self.assertIsNotNone(quantized_tflite_model)
|
||||
|
||||
@ -1163,7 +1163,7 @@ class FromSessionTest(TestModels, parameterized.TestCase):
|
||||
quantized_converter = lite.TFLiteConverter.from_session(
|
||||
sess, [inp], [output])
|
||||
quantized_converter.experimental_new_converter = enable_mlir_converter
|
||||
quantized_converter._experimental_new_quantizer = enable_mlir_quantizer
|
||||
quantized_converter.experimental_new_quantizer = enable_mlir_quantizer
|
||||
quantized_converter.optimizations = [lite.Optimize.DEFAULT]
|
||||
quantized_converter.target_spec.supported_types = [dtypes.float16]
|
||||
if include_int8:
|
||||
@ -1310,7 +1310,7 @@ class FromSessionTest(TestModels, parameterized.TestCase):
|
||||
# trigger post-training quantization
|
||||
converter.optimizations = [lite.Optimize.DEFAULT]
|
||||
converter.representative_dataset = calibration_gen
|
||||
converter._experimental_new_quantizer = True
|
||||
converter.experimental_new_quantizer = True
|
||||
quantized_tflite_model = converter.convert()
|
||||
self.assertIsNotNone(quantized_tflite_model)
|
||||
self.assertLess(len(quantized_tflite_model), len(float_tflite_model))
|
||||
|
@ -178,7 +178,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
|
||||
quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func])
|
||||
quantized_converter.optimizations = [lite.Optimize.DEFAULT]
|
||||
quantized_converter.representative_dataset = calibration_gen
|
||||
quantized_converter._experimental_new_quantizer = mlir_quantizer
|
||||
quantized_converter.experimental_new_quantizer = mlir_quantizer
|
||||
quantized_tflite_model = quantized_converter.convert()
|
||||
self.assertIsNotNone(quantized_tflite_model)
|
||||
|
||||
@ -418,11 +418,11 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
|
||||
quantized_converter.representative_dataset = calibration_gen
|
||||
|
||||
# default quantizer
|
||||
quantized_converter._experimental_new_quantizer = False
|
||||
quantized_converter.experimental_new_quantizer = False
|
||||
old_tflite = quantized_converter.convert()
|
||||
|
||||
# new quantizer
|
||||
quantized_converter._experimental_new_quantizer = True
|
||||
quantized_converter.experimental_new_quantizer = True
|
||||
new_tflite = quantized_converter.convert()
|
||||
|
||||
for _ in range(5):
|
||||
@ -648,7 +648,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
|
||||
|
||||
quantized_converter.inference_input_type = inference_input_output_type
|
||||
quantized_converter.inference_output_type = inference_input_output_type
|
||||
quantized_converter._experimental_new_quantizer = enable_mlir_quantizer
|
||||
quantized_converter.experimental_new_quantizer = enable_mlir_quantizer
|
||||
quantized_tflite_model = quantized_converter.convert()
|
||||
self.assertIsNotNone(quantized_tflite_model)
|
||||
|
||||
@ -681,7 +681,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
|
||||
|
||||
# Create a TFLite model with new quantizer.
|
||||
quantized_converter.optimizations = [lite.Optimize.DEFAULT]
|
||||
quantized_converter._experimental_new_quantizer = True
|
||||
quantized_converter.experimental_new_quantizer = True
|
||||
production_tflite = quantized_converter.convert()
|
||||
# Create a TFLite model with new quantizer and numeric verify ops.
|
||||
quantized_converter._experimental_calibrate_only = True
|
||||
|
@ -148,7 +148,7 @@ def _convert(converter, **kwargs):
|
||||
converter.target_spec.supported_ops = [_lite.OpsSet.TFLITE_BUILTINS_INT8]
|
||||
converter.representative_dataset = _get_calib_data_func(input_size)
|
||||
# Note that the full integer quantization is by the mlir quantizer
|
||||
converter._experimental_new_quantizer = True # pylint: disable=protected-access
|
||||
converter.experimental_new_quantizer = True
|
||||
if kwargs.get("post_training_quantize_16x8", False):
|
||||
input_size = kwargs.get("model_input_size")
|
||||
converter.optimizations = [_lite.Optimize.DEFAULT]
|
||||
|
@ -115,7 +115,7 @@ def toco_convert(options, graph_def, input_tensors, output_tensors, **kwargs):
|
||||
graphdef_file.name, input_arrays, output_tensors, input_shapes)
|
||||
|
||||
converter.experimental_new_converter = options.use_experimental_converter
|
||||
converter._experimental_new_quantizer = options.mlir_quantizer # pylint: disable=protected-access
|
||||
converter.experimental_new_quantizer = options.mlir_quantizer
|
||||
converter.optimizations = [tf.lite.Optimize.DEFAULT]
|
||||
|
||||
if fully_quantize:
|
||||
|
Loading…
Reference in New Issue
Block a user