diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb index 04e351785b0..83f3f964ab3 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb @@ -601,7 +601,7 @@ "outputs": [], "source": [ "config = configs.QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n", - "config._experimental_new_quantizer = True" + "config.experimental_new_quantizer = True" ] }, { diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb index 558f7afb418..8f984ab6907 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb @@ -476,7 +476,7 @@ "outputs": [], "source": [ "config = configs.QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n", - "config._experimental_new_quantizer = True" + "config.experimental_new_quantizer = True" ] }, { diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index 27f8dc7ccc7..739d39f5bab 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -429,7 +429,8 @@ class TFLiteConverterBase(object): self.target_spec = TargetSpec() self.allow_custom_ops = False self.experimental_new_converter = True - self._experimental_new_quantizer = False + self.experimental_new_quantizer = False + self._experimental_new_quantizer = None self._experimental_calibrate_only = False self._experimental_sparsify_model = False self._debug_info = None # contains the stack traces of all the original @@ -474,13 +475,13 @@ class TFLiteConverterBase(object): # Add intermediate tensors to the model if needed. result = _calibrator.add_intermediate_tensors(result) calibrate_quantize = _calibrator.Calibrator(result) - if self._experimental_calibrate_only or self._experimental_new_quantizer: + if self._experimental_calibrate_only or self.experimental_new_quantizer: calibrated = calibrate_quantize.calibrate( self.representative_dataset.input_gen) if self._experimental_calibrate_only: return calibrated - elif self._experimental_new_quantizer and ( + elif self.experimental_new_quantizer and ( activations_type != _dtypes.int16): # TODO(b/175659372): remove the activations_type restriction and enable # it for all the activation types. @@ -565,6 +566,10 @@ class TFLiteConverterBase(object): def _sparsify_model(self): return Optimize.EXPERIMENTAL_SPARSITY in self.optimizations + def _validate_experimental_new_quantizer_flag(self): + if self._experimental_new_quantizer is not None: + raise ValueError("Please use 'experimental_new_quantizer' instead.") + class TFLiteConverterBaseV2(TFLiteConverterBase): """Converter subclass to share functionality between V2 converters.""" @@ -617,6 +622,7 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): self.representative_dataset, graph_def) self._validate_inference_input_output_types(quant_mode) + self._validate_experimental_new_quantizer_flag() if not self._is_unknown_shapes_allowed(): # Checks dimensions in input tensor. @@ -1011,6 +1017,9 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2): to the TensorFlow Lite runtime with a custom resolver. (default False) experimental_new_converter: Experimental flag, subject to change. Enables MLIR-based conversion instead of TOCO conversion. (default True) + experimental_new_quantizer: Experimental flag, subject to change. Enables + MLIR-based quantization conversion instead of Flatbuffer-based conversion. + (default False) Example usage: @@ -1340,6 +1349,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase): calibrate_quantize, flags = quant_mode.quantizer_flags() self._validate_quantized_input_stats(converter_kwargs, calibrate_quantize) + self._validate_experimental_new_quantizer_flag() # Converts model. if self._has_valid_tensors(): @@ -1358,7 +1368,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase): if calibrate_quantize: result = self._calibrate_quantize_model(result, **flags) - if self.experimental_new_converter or self._experimental_new_quantizer: + if self.experimental_new_converter or self.experimental_new_quantizer: flags_modify_model_io_type = quant_mode.flags_modify_model_io_type( self.inference_input_type, self.inference_output_type) if flags_modify_model_io_type: @@ -1731,6 +1741,9 @@ class TFLiteConverter(TFLiteFrozenGraphConverter): set it to `{tf.lite.Optimize.DEFAULT}`. (default False) experimental_new_converter: Experimental flag, subject to change. Enables MLIR-based conversion instead of TOCO conversion. (default True) + experimental_new_quantizer: Experimental flag, subject to change. Enables + MLIR-based quantization conversion instead of Flatbuffer-based conversion. + (default False) Example usage: diff --git a/tensorflow/lite/python/lite_test.py b/tensorflow/lite/python/lite_test.py index 5bfd0a91a8d..8490e17ec07 100644 --- a/tensorflow/lite/python/lite_test.py +++ b/tensorflow/lite/python/lite_test.py @@ -367,7 +367,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): quantized_converter.inference_input_type = inference_input_output_type quantized_converter.inference_output_type = inference_input_output_type - quantized_converter._experimental_new_quantizer = enable_mlir_quantizer + quantized_converter.experimental_new_quantizer = enable_mlir_quantizer quantized_tflite_model = quantized_converter.convert() self.assertIsNotNone(quantized_tflite_model) @@ -1163,7 +1163,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): quantized_converter = lite.TFLiteConverter.from_session( sess, [inp], [output]) quantized_converter.experimental_new_converter = enable_mlir_converter - quantized_converter._experimental_new_quantizer = enable_mlir_quantizer + quantized_converter.experimental_new_quantizer = enable_mlir_quantizer quantized_converter.optimizations = [lite.Optimize.DEFAULT] quantized_converter.target_spec.supported_types = [dtypes.float16] if include_int8: @@ -1310,7 +1310,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): # trigger post-training quantization converter.optimizations = [lite.Optimize.DEFAULT] converter.representative_dataset = calibration_gen - converter._experimental_new_quantizer = True + converter.experimental_new_quantizer = True quantized_tflite_model = converter.convert() self.assertIsNotNone(quantized_tflite_model) self.assertLess(len(quantized_tflite_model), len(float_tflite_model)) diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 6d261c9b5ba..638365782f3 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -178,7 +178,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func]) quantized_converter.optimizations = [lite.Optimize.DEFAULT] quantized_converter.representative_dataset = calibration_gen - quantized_converter._experimental_new_quantizer = mlir_quantizer + quantized_converter.experimental_new_quantizer = mlir_quantizer quantized_tflite_model = quantized_converter.convert() self.assertIsNotNone(quantized_tflite_model) @@ -418,11 +418,11 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter.representative_dataset = calibration_gen # default quantizer - quantized_converter._experimental_new_quantizer = False + quantized_converter.experimental_new_quantizer = False old_tflite = quantized_converter.convert() # new quantizer - quantized_converter._experimental_new_quantizer = True + quantized_converter.experimental_new_quantizer = True new_tflite = quantized_converter.convert() for _ in range(5): @@ -648,7 +648,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter.inference_input_type = inference_input_output_type quantized_converter.inference_output_type = inference_input_output_type - quantized_converter._experimental_new_quantizer = enable_mlir_quantizer + quantized_converter.experimental_new_quantizer = enable_mlir_quantizer quantized_tflite_model = quantized_converter.convert() self.assertIsNotNone(quantized_tflite_model) @@ -681,7 +681,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): # Create a TFLite model with new quantizer. quantized_converter.optimizations = [lite.Optimize.DEFAULT] - quantized_converter._experimental_new_quantizer = True + quantized_converter.experimental_new_quantizer = True production_tflite = quantized_converter.convert() # Create a TFLite model with new quantizer and numeric verify ops. quantized_converter._experimental_calibrate_only = True diff --git a/tensorflow/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/lite/testing/model_coverage/model_coverage_lib.py index d3b8cc1ac97..f59c7c2a17a 100644 --- a/tensorflow/lite/testing/model_coverage/model_coverage_lib.py +++ b/tensorflow/lite/testing/model_coverage/model_coverage_lib.py @@ -148,7 +148,7 @@ def _convert(converter, **kwargs): converter.target_spec.supported_ops = [_lite.OpsSet.TFLITE_BUILTINS_INT8] converter.representative_dataset = _get_calib_data_func(input_size) # Note that the full integer quantization is by the mlir quantizer - converter._experimental_new_quantizer = True # pylint: disable=protected-access + converter.experimental_new_quantizer = True if kwargs.get("post_training_quantize_16x8", False): input_size = kwargs.get("model_input_size") converter.optimizations = [_lite.Optimize.DEFAULT] diff --git a/tensorflow/lite/testing/toco_convert.py b/tensorflow/lite/testing/toco_convert.py index 5216c1febbe..a40d9e36544 100644 --- a/tensorflow/lite/testing/toco_convert.py +++ b/tensorflow/lite/testing/toco_convert.py @@ -115,7 +115,7 @@ def toco_convert(options, graph_def, input_tensors, output_tensors, **kwargs): graphdef_file.name, input_arrays, output_tensors, input_shapes) converter.experimental_new_converter = options.use_experimental_converter - converter._experimental_new_quantizer = options.mlir_quantizer # pylint: disable=protected-access + converter.experimental_new_quantizer = options.mlir_quantizer converter.optimizations = [tf.lite.Optimize.DEFAULT] if fully_quantize: