Make the experimental_new_quantizer flag to be public

PiperOrigin-RevId: 351421198
Change-Id: I90a1df2a7d6ad074cc7f0d1d04a005a16580fb4c
This commit is contained in:
Feng Liu 2021-01-12 12:10:05 -08:00 committed by TensorFlower Gardener
parent 3b5bb5a706
commit 201ac54c8d
7 changed files with 29 additions and 16 deletions

View File

@ -601,7 +601,7 @@
"outputs": [],
"source": [
"config = configs.QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n",
"config._experimental_new_quantizer = True"
"config.experimental_new_quantizer = True"
]
},
{

View File

@ -476,7 +476,7 @@
"outputs": [],
"source": [
"config = configs.QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n",
"config._experimental_new_quantizer = True"
"config.experimental_new_quantizer = True"
]
},
{

View File

@ -429,7 +429,8 @@ class TFLiteConverterBase(object):
self.target_spec = TargetSpec()
self.allow_custom_ops = False
self.experimental_new_converter = True
self._experimental_new_quantizer = False
self.experimental_new_quantizer = False
self._experimental_new_quantizer = None
self._experimental_calibrate_only = False
self._experimental_sparsify_model = False
self._debug_info = None # contains the stack traces of all the original
@ -474,13 +475,13 @@ class TFLiteConverterBase(object):
# Add intermediate tensors to the model if needed.
result = _calibrator.add_intermediate_tensors(result)
calibrate_quantize = _calibrator.Calibrator(result)
if self._experimental_calibrate_only or self._experimental_new_quantizer:
if self._experimental_calibrate_only or self.experimental_new_quantizer:
calibrated = calibrate_quantize.calibrate(
self.representative_dataset.input_gen)
if self._experimental_calibrate_only:
return calibrated
elif self._experimental_new_quantizer and (
elif self.experimental_new_quantizer and (
activations_type != _dtypes.int16):
# TODO(b/175659372): remove the activations_type restriction and enable
# it for all the activation types.
@ -565,6 +566,10 @@ class TFLiteConverterBase(object):
def _sparsify_model(self):
return Optimize.EXPERIMENTAL_SPARSITY in self.optimizations
def _validate_experimental_new_quantizer_flag(self):
if self._experimental_new_quantizer is not None:
raise ValueError("Please use 'experimental_new_quantizer' instead.")
class TFLiteConverterBaseV2(TFLiteConverterBase):
"""Converter subclass to share functionality between V2 converters."""
@ -617,6 +622,7 @@ class TFLiteConverterBaseV2(TFLiteConverterBase):
self.representative_dataset, graph_def)
self._validate_inference_input_output_types(quant_mode)
self._validate_experimental_new_quantizer_flag()
if not self._is_unknown_shapes_allowed():
# Checks dimensions in input tensor.
@ -1011,6 +1017,9 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2):
to the TensorFlow Lite runtime with a custom resolver. (default False)
experimental_new_converter: Experimental flag, subject to change. Enables
MLIR-based conversion instead of TOCO conversion. (default True)
experimental_new_quantizer: Experimental flag, subject to change. Enables
MLIR-based quantization conversion instead of Flatbuffer-based conversion.
(default False)
Example usage:
@ -1340,6 +1349,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase):
calibrate_quantize, flags = quant_mode.quantizer_flags()
self._validate_quantized_input_stats(converter_kwargs, calibrate_quantize)
self._validate_experimental_new_quantizer_flag()
# Converts model.
if self._has_valid_tensors():
@ -1358,7 +1368,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase):
if calibrate_quantize:
result = self._calibrate_quantize_model(result, **flags)
if self.experimental_new_converter or self._experimental_new_quantizer:
if self.experimental_new_converter or self.experimental_new_quantizer:
flags_modify_model_io_type = quant_mode.flags_modify_model_io_type(
self.inference_input_type, self.inference_output_type)
if flags_modify_model_io_type:
@ -1731,6 +1741,9 @@ class TFLiteConverter(TFLiteFrozenGraphConverter):
set it to `{tf.lite.Optimize.DEFAULT}`. (default False)
experimental_new_converter: Experimental flag, subject to change. Enables
MLIR-based conversion instead of TOCO conversion. (default True)
experimental_new_quantizer: Experimental flag, subject to change. Enables
MLIR-based quantization conversion instead of Flatbuffer-based conversion.
(default False)
Example usage:

View File

@ -367,7 +367,7 @@ class FromSessionTest(TestModels, parameterized.TestCase):
quantized_converter.inference_input_type = inference_input_output_type
quantized_converter.inference_output_type = inference_input_output_type
quantized_converter._experimental_new_quantizer = enable_mlir_quantizer
quantized_converter.experimental_new_quantizer = enable_mlir_quantizer
quantized_tflite_model = quantized_converter.convert()
self.assertIsNotNone(quantized_tflite_model)
@ -1163,7 +1163,7 @@ class FromSessionTest(TestModels, parameterized.TestCase):
quantized_converter = lite.TFLiteConverter.from_session(
sess, [inp], [output])
quantized_converter.experimental_new_converter = enable_mlir_converter
quantized_converter._experimental_new_quantizer = enable_mlir_quantizer
quantized_converter.experimental_new_quantizer = enable_mlir_quantizer
quantized_converter.optimizations = [lite.Optimize.DEFAULT]
quantized_converter.target_spec.supported_types = [dtypes.float16]
if include_int8:
@ -1310,7 +1310,7 @@ class FromSessionTest(TestModels, parameterized.TestCase):
# trigger post-training quantization
converter.optimizations = [lite.Optimize.DEFAULT]
converter.representative_dataset = calibration_gen
converter._experimental_new_quantizer = True
converter.experimental_new_quantizer = True
quantized_tflite_model = converter.convert()
self.assertIsNotNone(quantized_tflite_model)
self.assertLess(len(quantized_tflite_model), len(float_tflite_model))

View File

@ -178,7 +178,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func])
quantized_converter.optimizations = [lite.Optimize.DEFAULT]
quantized_converter.representative_dataset = calibration_gen
quantized_converter._experimental_new_quantizer = mlir_quantizer
quantized_converter.experimental_new_quantizer = mlir_quantizer
quantized_tflite_model = quantized_converter.convert()
self.assertIsNotNone(quantized_tflite_model)
@ -418,11 +418,11 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
quantized_converter.representative_dataset = calibration_gen
# default quantizer
quantized_converter._experimental_new_quantizer = False
quantized_converter.experimental_new_quantizer = False
old_tflite = quantized_converter.convert()
# new quantizer
quantized_converter._experimental_new_quantizer = True
quantized_converter.experimental_new_quantizer = True
new_tflite = quantized_converter.convert()
for _ in range(5):
@ -648,7 +648,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
quantized_converter.inference_input_type = inference_input_output_type
quantized_converter.inference_output_type = inference_input_output_type
quantized_converter._experimental_new_quantizer = enable_mlir_quantizer
quantized_converter.experimental_new_quantizer = enable_mlir_quantizer
quantized_tflite_model = quantized_converter.convert()
self.assertIsNotNone(quantized_tflite_model)
@ -681,7 +681,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
# Create a TFLite model with new quantizer.
quantized_converter.optimizations = [lite.Optimize.DEFAULT]
quantized_converter._experimental_new_quantizer = True
quantized_converter.experimental_new_quantizer = True
production_tflite = quantized_converter.convert()
# Create a TFLite model with new quantizer and numeric verify ops.
quantized_converter._experimental_calibrate_only = True

View File

@ -148,7 +148,7 @@ def _convert(converter, **kwargs):
converter.target_spec.supported_ops = [_lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.representative_dataset = _get_calib_data_func(input_size)
# Note that the full integer quantization is by the mlir quantizer
converter._experimental_new_quantizer = True # pylint: disable=protected-access
converter.experimental_new_quantizer = True
if kwargs.get("post_training_quantize_16x8", False):
input_size = kwargs.get("model_input_size")
converter.optimizations = [_lite.Optimize.DEFAULT]

View File

@ -115,7 +115,7 @@ def toco_convert(options, graph_def, input_tensors, output_tensors, **kwargs):
graphdef_file.name, input_arrays, output_tensors, input_shapes)
converter.experimental_new_converter = options.use_experimental_converter
converter._experimental_new_quantizer = options.mlir_quantizer # pylint: disable=protected-access
converter.experimental_new_quantizer = options.mlir_quantizer
converter.optimizations = [tf.lite.Optimize.DEFAULT]
if fully_quantize: