Update descriptions and error messages in the TFLite Converter code

PiperOrigin-RevId: 350175211
Change-Id: Ib5c66c37a307da74a3fa96a7cab8f0f3d23e7138
This commit is contained in:
Meghna Natraj 2021-01-05 10:54:37 -08:00 committed by TensorFlower Gardener
parent 390e2deb75
commit 2c687ec4d9
5 changed files with 178 additions and 180 deletions

View File

@ -110,7 +110,7 @@ class OpsSet(enum.Enum):
"EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8" "EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8"
def __str__(self): def __str__(self):
return self.value return str(self.value)
@staticmethod @staticmethod
def get_options(): def get_options():
@ -394,22 +394,22 @@ def build_toco_convert_protos(input_tensors,
input_tensors: List of input tensors. Type and shape are computed using input_tensors: List of input tensors. Type and shape are computed using
`foo.shape` and `foo.dtype`. `foo.shape` and `foo.dtype`.
output_tensors: List of output tensors (only .name is used from this). output_tensors: List of output tensors (only .name is used from this).
inference_type: Target data type of real-number arrays in the output file. inference_type: Data type of numeric arrays, excluding the input layer.
Must be `{tf.float32, tf.uint8, tf.int8}`. (default tf.float32) (default tf.float32, must be in {tf.float32, tf.int8, tf.uint8})
inference_input_type: Target data type of real-number input arrays. Allows inference_input_type: Data type of the numeric arrays in the input layer. If
for a different type for input arrays in the case of quantization. Must be `inference_input_type` is in {tf.int8, tf.uint8}, then
`{tf.float32, tf.uint8, tf.int8}`. (default `inference_type`) `quantized_input_stats` must be provided. (default is the value assigned
input_format: Type of data to read Currently must be to `inference_type`, must be in {tf.float32, tf.int8, tf.uint8})
`{TENSORFLOW_GRAPHDEF}`. (default TENSORFLOW_GRAPHDEF) input_format: Type of data to read.
input_shapes: Input array shape. It needs to be a list of the same length as (default TENSORFLOW_GRAPHDEF, must be in {TENSORFLOW_GRAPHDEF})
`input_tensors`, or None. (default None) input_shapes: Input array shape. (default None, must be None or a list of
output_format: Output file format. Currently must be `{TFLITE, the same length as `input_tensors`.)
GRAPHVIZ_DOT}`. (default TFLITE) output_format: Output file format. (default TFLITE, must be in
quantized_input_stats: List of tuples of floats representing the mean and {TFLITE, GRAPHVIZ_DOT})
standard deviation. Each tuple maps to the corresponding input tensor. quantized_input_stats: Map of input tensor names to a tuple of floats
Only need if `inference_input_type` is `QUANTIZED_UINT8` or `INT8`. representing the mean and standard deviation of the training data.
real_input_value = (quantized_input_value - mean_value) / std_dev_value. (e.g., {"foo" : (0., 1.)}). Required if `inference_input_type` is tf.int8
(default None) or tf.uint8. (default None)
default_ranges_stats: Tuple of integers representing (min, max) range values default_ranges_stats: Tuple of integers representing (min, max) range values
for all arrays without a specified range. Intended for experimenting with for all arrays without a specified range. Intended for experimenting with
quantization via "dummy quantization". (default None) quantization via "dummy quantization". (default None)
@ -574,8 +574,10 @@ def toco_convert_graph_def(input_data, input_arrays_with_shape, output_arrays,
if _requires_input_stats(toco_flags): if _requires_input_stats(toco_flags):
if (("quantized_input_stats" not in kwargs) or if (("quantized_input_stats" not in kwargs) or
(not kwargs["quantized_input_stats"])): (not kwargs["quantized_input_stats"])):
raise ValueError("std_dev and mean must be defined when inference_type " raise ValueError(
"or inference_input_type is QUANTIZED_UINT8 or INT8.") "The `quantized_input_stats` flag must be defined when either "
"`inference_type` flag or `inference_input_type` flag is set to "
"tf.int8 or tf.uint8.")
input_array.mean_value, input_array.std_value = kwargs[ input_array.mean_value, input_array.std_value = kwargs[
"quantized_input_stats"][idx] "quantized_input_stats"][idx]
input_array.name = name input_array.name = name
@ -661,7 +663,7 @@ def toco_convert(input_data, input_tensors, output_tensors, *args, **kwargs):
Typically this function is used to convert from TensorFlow GraphDef to TFLite. Typically this function is used to convert from TensorFlow GraphDef to TFLite.
Conversion can be customized by providing arguments that are forwarded to Conversion can be customized by providing arguments that are forwarded to
`build_toco_convert_protos` (see documentation for details). This function has `build_toco_convert_protos` (see documentation for details). This function has
been deprecated. Please use `lite.TFLiteConverter` instead. been deprecated. Please use `tf.lite.TFLiteConverter` instead.
Args: Args:
input_data: Input data (i.e. often `sess.graph_def`), input_data: Input data (i.e. often `sess.graph_def`),

View File

@ -137,7 +137,7 @@ class ConvertTest(test_util.TensorFlowTestCase):
self.assertEqual("output", output_details[0]["name"]) self.assertEqual("output", output_details[0]["name"])
self.assertEqual(np.uint8, output_details[0]["dtype"]) self.assertEqual(np.uint8, output_details[0]["dtype"])
self.assertTrue(([1, 16, 16, 3] == output_details[0]["shape"]).all()) self.assertTrue(([1, 16, 16, 3] == output_details[0]["shape"]).all())
self.assertTrue(output_details[0]["quantization"][0] > 0) # scale self.assertGreater(output_details[0]["quantization"][0], 0) # scale
def testGraphDefQuantizationInvalid(self): def testGraphDefQuantizationInvalid(self):
with ops.Graph().as_default(): with ops.Graph().as_default():
@ -159,9 +159,9 @@ class ConvertTest(test_util.TensorFlowTestCase):
enable_mlir_converter=False, enable_mlir_converter=False,
inference_type=dtypes.uint8) inference_type=dtypes.uint8)
self.assertEqual( self.assertEqual(
"std_dev and mean must be defined when inference_type or " "The `quantized_input_stats` flag must be defined when either "
"inference_input_type is QUANTIZED_UINT8 or INT8.", "`inference_type` flag or `inference_input_type` flag is set to "
str(error.exception)) "tf.int8 or tf.uint8.", str(error.exception))
class ConvertTestOpHint(test_util.TensorFlowTestCase): class ConvertTestOpHint(test_util.TensorFlowTestCase):

View File

@ -61,6 +61,7 @@ from tensorflow.lite.python.util import get_debug_info as _get_debug_info
from tensorflow.lite.python.util import get_grappler_config as _get_grappler_config from tensorflow.lite.python.util import get_grappler_config as _get_grappler_config
from tensorflow.lite.python.util import get_tensor_name as _get_tensor_name from tensorflow.lite.python.util import get_tensor_name as _get_tensor_name
from tensorflow.lite.python.util import get_tensors_from_tensor_names as _get_tensors_from_tensor_names from tensorflow.lite.python.util import get_tensors_from_tensor_names as _get_tensors_from_tensor_names
from tensorflow.lite.python.util import get_tf_type_name as _get_tf_type_name
from tensorflow.lite.python.util import is_frozen_graph as _is_frozen_graph from tensorflow.lite.python.util import is_frozen_graph as _is_frozen_graph
from tensorflow.lite.python.util import model_input_signature as _model_input_signature from tensorflow.lite.python.util import model_input_signature as _model_input_signature
from tensorflow.lite.python.util import modify_model_io_type as _modify_model_io_type from tensorflow.lite.python.util import modify_model_io_type as _modify_model_io_type
@ -89,19 +90,14 @@ from tensorflow.python.util.tf_export import tf_export as _tf_export
@_tf_export("lite.Optimize") @_tf_export("lite.Optimize")
class Optimize(enum.Enum): class Optimize(enum.Enum):
"""Enum defining the optimizations to apply when generating tflite graphs. """Enum defining the optimizations to apply when generating a tflite model.
Some optimizations may come at the cost of accuracy.
DEFAULT DEFAULT
Default optimization strategy. Default optimization strategy that quantizes model weights. Enhanced
optimizations are gained by providing a representative dataset that
Converter will do its best to improve size and latency based on the quantizes biases and activations as well.
information provided. Converter will do its best to reduce size and latency, while minimizing
Enhanced optimizations are gained by providing a representative_dataset. the loss in accuracy.
This is recommended, and is currently equivalent to the modes below.
Currently, weights will be quantized and if representative_dataset is
provided, activations for quantizable operations will also be quantized.
OPTIMIZE_FOR_SIZE OPTIMIZE_FOR_SIZE
Deprecated. Does the same as DEFAULT. Deprecated. Does the same as DEFAULT.
@ -110,14 +106,11 @@ class Optimize(enum.Enum):
Deprecated. Does the same as DEFAULT. Deprecated. Does the same as DEFAULT.
""" """
# Default optimization strategy. # Default optimization strategy that quantizes model weights. Enhanced
# # optimizations are gained by providing a representative dataset that
# Converter will do its best to improve size and latency based on the # quantizes biases and activations as well.
# information provided. # Converter will do its best to reduce size and latency, while minimizing
# Enhanced optimizations can be gained by providing a representative_dataset. # the loss in accuracy.
# This is recommended, and is currently equivalent to the modes below.
# Currently, weights will be quantized and if representative_dataset is
# provided, activations for quantizable operations will also be quantized.
DEFAULT = "DEFAULT" DEFAULT = "DEFAULT"
# Deprecated. Does the same as DEFAULT. # Deprecated. Does the same as DEFAULT.
@ -132,48 +125,47 @@ class Optimize(enum.Enum):
@_tf_export("lite.RepresentativeDataset") @_tf_export("lite.RepresentativeDataset")
class RepresentativeDataset(object): class RepresentativeDataset(object):
"""Representative dataset to evaluate optimizations. """Representative dataset used to optimize the model.
A representative dataset that can be used to evaluate optimizations by the This is a generator function that provides a small dataset to calibrate or
converter. E.g. converter can use these examples to estimate (min, max) ranges estimate the range, i.e, (min, max) of all floating-point arrays in the model
by calibrating the model on inputs. This can allow converter to quantize a (such as model input, activation outputs of intermediate layers, and model
converted floating point model. output) for quantization. Usually, this is a small subset of a few hundred
samples randomly chosen, in no particular order, from the training or
evaluation dataset.
""" """
def __init__(self, input_gen): def __init__(self, input_gen):
"""Creates a representative dataset. """Creates a representative dataset.
Args: Args:
input_gen: an input generator that can be used to generate input samples input_gen: A generator function that generates input samples for the
for the model. This must be a callable object that returns an object model and has the same order, type and shape as the inputs to the model.
that supports the `iter()` protocol (e.g. a generator function). The Usually, this is a small subset of a few hundred samples randomly
elements generated must have same type and shape as inputs to the model. chosen, in no particular order, from the training or evaluation dataset.
""" """
self.input_gen = input_gen self.input_gen = input_gen
@_tf_export("lite.TargetSpec") @_tf_export("lite.TargetSpec")
class TargetSpec(object): class TargetSpec(object):
"""Specification of target device. """Specification of target device used to optimize the model.
Details about target device. Converter optimizes the generated model for
specific device.
Attributes: Attributes:
supported_ops: Experimental flag, subject to change. Set of OpsSet options supported_ops: Experimental flag, subject to change. Set of `tf.lite.OpsSet`
supported by the device. (default set([OpsSet.TFLITE_BUILTINS])) options, where each option represents a set of operators supported by the
supported_types: List of types for constant values on the target device. target device. (default {tf.lite.OpsSet.TFLITE_BUILTINS}))
Frequently, an optimization choice is driven by the most compact supported_types: Set of `tf.dtypes.DType` data types supported on the target
(i.e. smallest) type in this list (default [tf.float32]) device. If initialized, optimization might be driven by the smallest type
in this set. (default set())
experimental_select_user_tf_ops: Experimental flag, subject to change. Set experimental_select_user_tf_ops: Experimental flag, subject to change. Set
of user's TensorFlow operators' names that are required in the TensorFlow of user's TensorFlow operators' names that are required in the TensorFlow
Lite runtime. These ops will be exported as select TensorFlow ops in the Lite runtime. These ops will be exported as select TensorFlow ops in the
model (in conjunction with the OpsSet.SELECT_TF_OPS flag). This is an model (in conjunction with the tf.lite.OpsSet.SELECT_TF_OPS flag). This is
advanced feature that should only be used if the client is using TF ops an advanced feature that should only be used if the client is using TF ops
that may not be linked in by default with the TF ops that are provided that may not be linked in by default with the TF ops that are provided
when using the SELECT_TF_OPS path. The client is responsible for linking when using the SELECT_TF_OPS path. The client is responsible for linking
these ops into the target runtime. these ops into the target runtime.
""" """
def __init__(self, def __init__(self,
@ -181,17 +173,17 @@ class TargetSpec(object):
supported_types=None, supported_types=None,
experimental_select_user_tf_ops=None): experimental_select_user_tf_ops=None):
if supported_ops is None: if supported_ops is None:
supported_ops = set([OpsSet.TFLITE_BUILTINS]) supported_ops = {OpsSet.TFLITE_BUILTINS}
self.supported_ops = supported_ops self.supported_ops = supported_ops
if supported_types is None: if supported_types is None:
supported_types = [] supported_types = set()
self.supported_types = supported_types self.supported_types = supported_types
if experimental_select_user_tf_ops is None: if experimental_select_user_tf_ops is None:
self.experimental_select_user_tf_ops = [] self.experimental_select_user_tf_ops = set()
class QuantizationMode(object): class QuantizationMode(object):
"""QuantizationMode determines the quantized conversion from user options.""" """QuantizationMode determines the quantization type from user options."""
def __init__(self, optimizations, target_spec, representative_dataset, def __init__(self, optimizations, target_spec, representative_dataset,
graph_def): graph_def):
@ -205,7 +197,6 @@ class QuantizationMode(object):
# TODO(b/162537905): Refactor the following quantization functions - # TODO(b/162537905): Refactor the following quantization functions -
# re-organize and refactor for better readability. # re-organize and refactor for better readability.
def post_training_int8_no_float(self): def post_training_int8_no_float(self):
"""Post training int8 quantize, disallow float fallback."""
return (self._any_optimization_enabled() and return (self._any_optimization_enabled() and
self._is_int8_target_required() and self._is_int8_target_required() and
not self._is_int16x8_target_required() and not self._is_int16x8_target_required() and
@ -213,19 +204,16 @@ class QuantizationMode(object):
self._representative_dataset is not None) self._representative_dataset is not None)
def post_training_int8_allow_float(self): def post_training_int8_allow_float(self):
"""Post training int8 quantize, allow float fallback."""
return (self._any_optimization_enabled() and return (self._any_optimization_enabled() and
not self._is_int16x8_target_required() and not self._is_int16x8_target_required() and
self._representative_dataset is not None and self._representative_dataset is not None and
self._smallest_supported_type() == _dtypes.int8) self._smallest_supported_type() == _dtypes.int8)
def is_post_training_integer_quantize_8(self): def is_post_training_integer_quantize_8(self):
"""Post training integer 8 quantization."""
return (self.post_training_int8_no_float() or return (self.post_training_int8_no_float() or
self.post_training_int8_allow_float()) self.post_training_int8_allow_float())
def is_post_training_integer_quantize_16x8(self): def is_post_training_integer_quantize_16x8(self):
"""Post training integer 16x8 quantization."""
return (self.post_training_int16x8_no_float() or return (self.post_training_int16x8_no_float() or
self.post_training_int16x8_allow_float()) self.post_training_int16x8_allow_float())
@ -239,7 +227,6 @@ class QuantizationMode(object):
self.contains_training_quant_op()) self.contains_training_quant_op())
def post_training_int16x8_no_float(self): def post_training_int16x8_no_float(self):
"""Post training int16x8 quantize, disallow float fallback."""
return (self._any_optimization_enabled() and return (self._any_optimization_enabled() and
not self._is_int8_target_required() and not self._is_int8_target_required() and
self._is_int16x8_target_required() and self._is_int16x8_target_required() and
@ -247,13 +234,11 @@ class QuantizationMode(object):
self._representative_dataset is not None) self._representative_dataset is not None)
def post_training_int16x8_allow_float(self): def post_training_int16x8_allow_float(self):
"""Post training int16x8 quantize, allow float fallback."""
return (self._any_optimization_enabled() and return (self._any_optimization_enabled() and
self._is_int16x8_target_required() and self._is_int16x8_target_required() and
self._is_allow_float()) self._is_allow_float())
def post_training_dynamic_range_int8(self): def post_training_dynamic_range_int8(self):
"""Post training int8 const, on-the-fly int8 quantize of dynamic tensors."""
# Post-training dynamic range quantization is only enabled if post-training # Post-training dynamic range quantization is only enabled if post-training
# int8 quantization and training time quantization was not done. # int8 quantization and training time quantization was not done.
return (self._any_optimization_enabled() and return (self._any_optimization_enabled() and
@ -262,7 +247,6 @@ class QuantizationMode(object):
self._smallest_supported_type() == _dtypes.int8) self._smallest_supported_type() == _dtypes.int8)
def post_training_fp16(self): def post_training_fp16(self):
"""Post training fp16 quantize."""
return (self._any_optimization_enabled() and return (self._any_optimization_enabled() and
self._smallest_supported_type() == _dtypes.float16) self._smallest_supported_type() == _dtypes.float16)
@ -416,21 +400,20 @@ class TFLiteConverterBase(object):
"""Converter subclass to share functionality between V1 and V2 converters.""" """Converter subclass to share functionality between V1 and V2 converters."""
def __init__(self): def __init__(self):
self.allow_custom_ops = False self.optimizations = set()
self.target_spec = TargetSpec()
self.optimizations = []
self.representative_dataset = None self.representative_dataset = None
self.target_spec = TargetSpec()
self.allow_custom_ops = False
self.experimental_new_converter = True self.experimental_new_converter = True
self._experimental_new_quantizer = False self._experimental_new_quantizer = False
self._experimental_calibrate_only = False self._experimental_calibrate_only = False
# The 'GraphDebugInfo' contains the stack traces of all the original nodes self._experimental_sparsify_model = False
# in the `GraphDef` to the converter. self._debug_info = None # contains the stack traces of all the original
self._debug_info = None # nodes in the `GraphDef` to the converter.
self.saved_model_dir = None self.saved_model_dir = None
self._saved_model_tags = None self._saved_model_tags = None
self._saved_model_version = 0 self._saved_model_version = 0
self._saved_model_exported_names = [] self._saved_model_exported_names = []
self._experimental_sparsify_model = False
def _grappler_config(self, optimizers=None): def _grappler_config(self, optimizers=None):
"""Creates a tf.compat.v1.ConfigProto for configuring Grappler. """Creates a tf.compat.v1.ConfigProto for configuring Grappler.
@ -684,9 +667,9 @@ class TFLiteSavedModelConverterV2(TFLiteConverterBaseV2):
saved_model_dir: Directory of the SavedModel. saved_model_dir: Directory of the SavedModel.
saved_model_tags: Set of tags identifying the MetaGraphDef within the saved_model_tags: Set of tags identifying the MetaGraphDef within the
SavedModel to analyze. All tags in the tag set must be present. (default SavedModel to analyze. All tags in the tag set must be present. (default
set(SERVING)). {tf.saved_model.SERVING}).
saved_model_exported_names: Names to be exported (default: export all) saved_model_exported_names: Names to be exported when the saved model
when the saved model import path is on. import path is on.
trackable_obj: tf.AutoTrackable object associated with `funcs`. A trackable_obj: tf.AutoTrackable object associated with `funcs`. A
reference to this object needs to be maintained so that Variables do not reference to this object needs to be maintained so that Variables do not
get garbage collected since functions have a weak reference to get garbage collected since functions have a weak reference to
@ -972,20 +955,21 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2):
"""Converts a TensorFlow model into TensorFlow Lite model. """Converts a TensorFlow model into TensorFlow Lite model.
Attributes: Attributes:
allow_custom_ops: Boolean indicating whether to allow custom operations. optimizations: Experimental flag, subject to change. Set of optimizations
When False, any unknown operation is an error. When True, custom ops are to apply. e.g {tf.lite.Optimize.DEFAULT}. (default None, must be None or a
created for any op that is unknown. The developer needs to provide these set of values of type `tf.lite.Optimize`)
to the TensorFlow Lite runtime with a custom resolver. (default False) representative_dataset: A generator function used for integer quantization
optimizations: Experimental flag, subject to change. A list of optimizations where each generated sample has the same order, type and shape as the
to apply when converting the model. E.g. `[Optimize.DEFAULT]` inputs to the model. Usually, this is a small subset of a few hundred
representative_dataset: A representative dataset that can be used to samples randomly chosen, in no particular order, from the training or
generate input and output samples for the model. The converter can use the evaluation dataset. This is an optional attribute, but required for full
dataset to evaluate different optimizations. Note that this is an optional integer quantization, i.e, if `tf.int8` is the only supported type in
attribute but it is necessary if INT8 is the only support builtin ops in `target_spec.supported_types`. Refer to `tf.lite.RepresentativeDataset`.
target ops. (default None)
target_spec: Experimental flag, subject to change. Specifications of target target_spec: Experimental flag, subject to change. Specifications of target
device, including supported ops set, supported types and a set of user's device, including supported ops set, supported types and a set of user's
defined TensorFlow operators required in the TensorFlow Lite runtime. defined TensorFlow operators required in the TensorFlow Lite runtime.
Refer to `tf.lite.TargetSpec`.
inference_input_type: Data type of the input layer. Note that integer types inference_input_type: Data type of the input layer. Note that integer types
(tf.int8 and tf.uint8) are currently only supported for post training (tf.int8 and tf.uint8) are currently only supported for post training
integer quantization and quantization aware training. (default tf.float32, integer quantization and quantization aware training. (default tf.float32,
@ -994,8 +978,13 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2):
types (tf.int8 and tf.uint8) are currently only supported for post types (tf.int8 and tf.uint8) are currently only supported for post
training integer quantization and quantization aware training. (default training integer quantization and quantization aware training. (default
tf.float32, must be in {tf.float32, tf.int8, tf.uint8}) tf.float32, must be in {tf.float32, tf.int8, tf.uint8})
allow_custom_ops: Boolean indicating whether to allow custom operations.
When False, any unknown operation is an error. When True, custom ops are
created for any op that is unknown. The developer needs to provide these
to the TensorFlow Lite runtime with a custom resolver. (default False)
experimental_new_converter: Experimental flag, subject to change. Enables experimental_new_converter: Experimental flag, subject to change. Enables
MLIR-based conversion instead of TOCO conversion. (default True) MLIR-based conversion instead of TOCO conversion. (default True)
Example usage: Example usage:
```python ```python
@ -1063,7 +1052,8 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2):
`signatures` attribute of the MetaGraphdef is used. (default `signatures` attribute of the MetaGraphdef is used. (default
saved_model.signatures) saved_model.signatures)
tags: Set of tags identifying the MetaGraphDef within the SavedModel to tags: Set of tags identifying the MetaGraphDef within the SavedModel to
analyze. All tags in the tag set must be present. (default set(SERVING)) analyze. All tags in the tag set must be present. (default
{tf.saved_model.SERVING} or {'serve'})
Returns: Returns:
TFLiteConverter object. TFLiteConverter object.
@ -1209,9 +1199,13 @@ class TFLiteConverterBaseV1(TFLiteConverterBase):
if (requires_quantized_input_stats and if (requires_quantized_input_stats and
not converter_kwargs["quantized_input_stats"]): not converter_kwargs["quantized_input_stats"]):
raise ValueError("The `quantized_input_stats` flag must be defined when " raise ValueError(
"either `inference_type` flag or `inference_input_type` " "The `quantized_input_stats` flag must be defined when either "
"flag is set to tf.uint8 or tf.int8.") "`inference_type` flag or `inference_input_type` flag is set to "
"tf.int8 or tf.uint8. Currently, `inference_type={}` and "
"`inference_input_type={}`.".format(
_get_tf_type_name(converter_kwargs["inference_type"]),
_get_tf_type_name(converter_kwargs["inference_input_type"])))
def convert(self): def convert(self):
"""Converts a TensorFlow GraphDef based on instance variables. """Converts a TensorFlow GraphDef based on instance variables.
@ -1424,9 +1418,9 @@ class TFLiteSavedModelConverter(TFLiteConverterBaseV1):
saved_model_dir: Directory of the SavedModel. saved_model_dir: Directory of the SavedModel.
saved_model_tags: Set of tags identifying the MetaGraphDef within the saved_model_tags: Set of tags identifying the MetaGraphDef within the
SavedModel to analyze. All tags in the tag set must be present. (default SavedModel to analyze. All tags in the tag set must be present. (default
set(SERVING)). {tf.saved_model.SERVING}).
saved_model_exported_names: Names to be exported (default: export all) saved_model_exported_names: Names to be exported when the saved model
when the saved model import path is on. import path is on.
experimental_debug_info_func: An experimental function to retrieve the experimental_debug_info_func: An experimental function to retrieve the
graph debug info for a set of nodes from the `graph_def`. graph debug info for a set of nodes from the `graph_def`.
@ -1645,33 +1639,42 @@ class TFLiteConverter(TFLiteFrozenGraphConverter):
model into either a TFLite FlatBuffer or graph visualization. model into either a TFLite FlatBuffer or graph visualization.
Attributes: Attributes:
inference_type: Target data type of real-number arrays in the output file. optimizations: Experimental flag, subject to change. Set of optimizations to
Must be `{tf.float32, tf.uint8}`. If `optimzations` are provided, this apply. e.g {tf.lite.Optimize.DEFAULT}. (default None, must be None or a
parameter is ignored. (default tf.float32) set of values of type `tf.lite.Optimize`)
inference_input_type: Target data type of real-number input arrays. Allows representative_dataset: A generator function used for integer quantization
for a different type for input arrays. If an integer type is provided and where each generated sample has the same order, type and shape as the
`optimizations` are not used, `quantized_input_stats` must be provided. If inputs to the model. Usually, this is a small subset of a few hundred
`inference_type` is tf.uint8, signaling conversion to a fully quantized samples randomly chosen, in no particular order, from the training or
model from a quantization-aware trained input model, then evaluation dataset. This is an optional attribute, but required for full
`inference_input_type` defaults to tf.uint8. In all other cases, integer quantization, i.e, if `tf.int8` is the only supported type in
`inference_input_type` defaults to tf.float32. Must be `{tf.float32, `target_spec.supported_types`. Refer to `tf.lite.RepresentativeDataset`.
tf.uint8, tf.int8}` (default None)
inference_output_type: Target data type of real-number output arrays. Allows target_spec: Experimental flag, subject to change. Specifications of target
for a different type for output arrays. If `inference_type` is tf.uint8, device, including supported ops set, supported types and a set of user's
signaling conversion to a fully quantized model from a quantization-aware defined TensorFlow operators required in the TensorFlow Lite runtime.
trained output model, then `inference_output_type` defaults to tf.uint8. Refer to `tf.lite.TargetSpec`.
In all other cases, `inference_output_type` must be tf.float32, an error inference_type: Data type of numeric arrays, excluding the input layer.
will be thrown otherwise. Must be `{tf.float32, tf.uint8, tf.int8}` (default tf.float32, must be in {tf.float32, tf.int8, tf.uint8})
output_format: Output file format. Currently must be `{TFLITE, inference_input_type: Data type of the numeric arrays in the input layer. If
GRAPHVIZ_DOT}`. (default TFLITE) `inference_input_type` is in {tf.int8, tf.uint8}, then
quantized_input_stats: Dict of strings representing input tensor names `quantized_input_stats` must be provided. (default is the value assigned
mapped to tuple of floats representing the mean and standard deviation to `inference_type`, must be in {tf.float32, tf.int8, tf.uint8})
of the training data (e.g., {"foo" : (0., 1.)}). Only need if inference_output_type: Data type of the numeric arrays in the output layer.
`inference_input_type` is `QUANTIZED_UINT8`. real_input_value = (default is the value assigned to `inference_type`, must be in
(quantized_input_value - mean_value) / std_dev_value. (default {}) {tf.float32, tf.int8, tf.uint8})
default_ranges_stats: Tuple of integers representing (min, max) range values quantized_input_stats: Map of input tensor names to a tuple of floats
for all arrays without a specified range. Intended for experimenting with representing the mean and standard deviation of the training data.
quantization via "dummy quantization". (default None) (e.g., {"foo" : (0., 1.)}). Required if `inference_input_type` is tf.int8
or tf.uint8. (default None)
default_ranges_stats: Tuple of integers (min, max) representing range values
for all numeric arrays without a specified range. Intended for
experimenting with quantization via "dummy quantization". (default None)
allow_custom_ops: Boolean indicating whether to allow custom operations.
When False any unknown operation is an error. When True, custom ops are
created for any op that is unknown. The developer will need to provide
these to the TensorFlow Lite runtime with a custom resolver. (default
False)
drop_control_dependency: Boolean indicating whether to drop control drop_control_dependency: Boolean indicating whether to drop control
dependencies silently. This is due to TFLite not supporting control dependencies silently. This is due to TFLite not supporting control
dependencies. (default True) dependencies. (default True)
@ -1683,37 +1686,25 @@ class TFLiteConverter(TFLiteFrozenGraphConverter):
change_concat_input_ranges: Boolean to change behavior of min/max ranges for change_concat_input_ranges: Boolean to change behavior of min/max ranges for
inputs and outputs of the concat operator for quantized models. Changes inputs and outputs of the concat operator for quantized models. Changes
the ranges of concat operator overlap when true. (default False) the ranges of concat operator overlap when true. (default False)
allow_custom_ops: Boolean indicating whether to allow custom operations. output_format: Output file format. (default
When false any unknown operation is an error. When true, custom ops are tf.compat.v1.lite.constants.TFLITE, must be in
created for any op that is unknown. The developer will need to provide {tf.compat.v1.lite.constants.TFLITE,
these to the TensorFlow Lite runtime with a custom resolver. (default tf.compat.v1.lite.constants.GRAPHVIZ_DOT})
False)
post_training_quantize: Deprecated. Please specify `[Optimize.DEFAULT]` for
`optimizations` instead. Boolean indicating whether to quantize the
weights of the converted float model. Model size will be reduced and
there will be latency improvements (at the cost of accuracy). (default
False)
dump_graphviz_dir: Full filepath of folder to dump the graphs at various dump_graphviz_dir: Full filepath of folder to dump the graphs at various
stages of processing GraphViz .dot files. Preferred over stages of processing GraphViz .dot files. Preferred over
--output_format=GRAPHVIZ_DOT in order to keep the requirements of the `output_format=tf.compat.v1.lite.constants.GRAPHVIZ_DOT` in order to keep
output file. (default None) the requirements of the output file. (default None)
dump_graphviz_video: Boolean indicating whether to dump the graph after dump_graphviz_video: Boolean indicating whether to dump the GraphViz .dot
every graph transformation. (default False) files after every graph transformation. Requires the `dump_graphviz_dir`
conversion_summary_dir: A string indicating the path to the generated flag to be specified. (default False)
conversion logs. conversion_summary_dir: Full path of the directory to store conversion logs.
target_ops: Deprecated. Please specify `target_spec.supported_ops` instead. (default None)
Set of OpsSet options indicating which converter to use. (default target_ops: Deprecated. Please use `target_spec.supported_ops` instead.
set([OpsSet.TFLITE_BUILTINS])) post_training_quantize: Deprecated. Please use `optimizations` instead and
target_spec: Experimental flag, subject to change. Specifications of target set it to `{tf.lite.Optimize.DEFAULT}`. (default False)
device, including supported ops set, supported types and a set of user's
defined TensorFlow operators required in the TensorFlow Lite runtime.
optimizations: Experimental flag, subject to change. A list of optimizations
to apply when converting the model. E.g. `[Optimize.DEFAULT]`
representative_dataset: A representative dataset that can be used to
generate input and output samples for the model. The converter can use the
dataset to evaluate different optimizations.
experimental_new_converter: Experimental flag, subject to change. Enables experimental_new_converter: Experimental flag, subject to change. Enables
MLIR-based conversion instead of TOCO conversion. (default True) MLIR-based conversion instead of TOCO conversion. (default True)
Example usage: Example usage:
```python ```python
@ -1911,9 +1902,10 @@ class TFLiteConverter(TFLiteFrozenGraphConverter):
output_arrays: List of output tensors to freeze graph with. Uses output output_arrays: List of output tensors to freeze graph with. Uses output
arrays from SignatureDef when none are provided. (default None) arrays from SignatureDef when none are provided. (default None)
tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to
analyze. All tags in the tag set must be present. (default set("serve")) analyze. All tags in the tag set must be present. (default
{tf.saved_model.SERVING})
signature_key: Key identifying SignatureDef containing inputs and outputs. signature_key: Key identifying SignatureDef containing inputs and outputs.
(default DEFAULT_SERVING_SIGNATURE_DEF_KEY) (default tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY)
Returns: Returns:
TFLiteConverter class. TFLiteConverter class.

View File

@ -1239,18 +1239,22 @@ class FromSessionTest(TestModels, parameterized.TestCase):
quantized_converter.inference_type = quantized_type quantized_converter.inference_type = quantized_type
quantized_converter.convert() quantized_converter.convert()
self.assertEqual( self.assertEqual(
'The `quantized_input_stats` flag must be defined when ' 'The `quantized_input_stats` flag must be defined when either '
'either `inference_type` flag or `inference_input_type` ' '`inference_type` flag or `inference_input_type` flag is set to '
'flag is set to tf.uint8 or tf.int8.', str(error.exception)) 'tf.int8 or tf.uint8. Currently, `inference_type=tf.{}` and '
'`inference_input_type=None`.'.format(quantized_type.name),
str(error.exception))
with self.assertRaises(ValueError) as error: with self.assertRaises(ValueError) as error:
quantized_converter.inference_type = dtypes.float32 quantized_converter.inference_type = dtypes.float32
quantized_converter.inference_input_type = quantized_type quantized_converter.inference_input_type = quantized_type
quantized_converter.convert() quantized_converter.convert()
self.assertEqual( self.assertEqual(
'The `quantized_input_stats` flag must be defined when ' 'The `quantized_input_stats` flag must be defined when either '
'either `inference_type` flag or `inference_input_type` ' '`inference_type` flag or `inference_input_type` flag is set to '
'flag is set to tf.uint8 or tf.int8.', str(error.exception)) 'tf.int8 or tf.uint8. Currently, `inference_type=tf.float32` and '
'`inference_input_type=tf.{}`.'.format(quantized_type.name),
str(error.exception))
quantized_converter.inference_type = quantized_type quantized_converter.inference_type = quantized_type
quantized_converter.inference_input_type = quantized_type quantized_converter.inference_input_type = quantized_type

View File

@ -127,9 +127,9 @@ def _convert_tflite_enum_type_to_tf_type(tflite_enum_type):
return tf_type return tf_type
def _get_tf_type_name(tf_type): def get_tf_type_name(tf_type):
"""Converts tf.dtype (eg: tf.float32) to str (eg: "tf.float32").""" """Converts tf.dtype (eg: tf.float32) to str (eg: "tf.float32")."""
return "tf." + tf_type.name return "tf." + tf_type.name if tf_type else None
def get_tensor_name(tensor): def get_tensor_name(tensor):
@ -674,7 +674,7 @@ def _modify_model_input_type(model, inference_input_type=dtypes.float32):
raise ValueError( raise ValueError(
"Initial model input type must be tf.float32. Expected type for " "Initial model input type must be tf.float32. Expected type for "
"tensor with name '{}' is tf.float32, instead type is {}".format( "tensor with name '{}' is tf.float32, instead type is {}".format(
float_tensor.name, _get_tf_type_name(float_type))) float_tensor.name, get_tf_type_name(float_type)))
# If found, validate that the operator output is quantized and compatible # If found, validate that the operator output is quantized and compatible
# with the final model input type # with the final model input type
quant_type = _convert_tflite_enum_type_to_tf_type(quant_tensor.type) quant_type = _convert_tflite_enum_type_to_tf_type(quant_tensor.type)
@ -683,17 +683,17 @@ def _modify_model_input_type(model, inference_input_type=dtypes.float32):
"Initial model input is not quantized. Expected type for " "Initial model input is not quantized. Expected type for "
"tensor with name '{}' should be in {}, instead type is {}".format( "tensor with name '{}' should be in {}, instead type is {}".format(
quant_tensor.name, quant_tensor.name,
tuple(_get_tf_type_name(t) for t in tuple(get_tf_type_name(t) for t in
_MAP_QUANT_TO_IO_TYPES.keys()), _MAP_QUANT_TO_IO_TYPES.keys()),
_get_tf_type_name(quant_type))) get_tf_type_name(quant_type)))
else: else:
inference_io_types = _MAP_QUANT_TO_IO_TYPES[quant_type] inference_io_types = _MAP_QUANT_TO_IO_TYPES[quant_type]
if inference_input_type not in inference_io_types: if inference_input_type not in inference_io_types:
raise ValueError( raise ValueError(
"Unsupported `inference_input_type` value. Expected to be in " "Unsupported `inference_input_type` value. Expected to be in "
"{}, instead got {}.".format( "{}, instead got {}.".format(
tuple(_get_tf_type_name(t) for t in inference_io_types), tuple(get_tf_type_name(t) for t in inference_io_types),
_get_tf_type_name(inference_input_type))) get_tf_type_name(inference_input_type)))
input_quant_ops.append(op) input_quant_ops.append(op)
if len(subgraph.inputs) != len(input_quant_ops): if len(subgraph.inputs) != len(input_quant_ops):
@ -725,7 +725,7 @@ def _modify_model_input_type(model, inference_input_type=dtypes.float32):
else: else:
raise ValueError( raise ValueError(
"Unsupported `inference_input_type` value {}.".format( "Unsupported `inference_input_type` value {}.".format(
_get_tf_type_name(inference_input_type))) get_tf_type_name(inference_input_type)))
def _modify_model_output_type(model, inference_output_type=dtypes.float32): def _modify_model_output_type(model, inference_output_type=dtypes.float32):
@ -768,7 +768,7 @@ def _modify_model_output_type(model, inference_output_type=dtypes.float32):
raise ValueError( raise ValueError(
"Initial model output type must be tf.float32. Expected type for " "Initial model output type must be tf.float32. Expected type for "
"tensor with name '{}' is tf.float32, instead type is {}".format( "tensor with name '{}' is tf.float32, instead type is {}".format(
float_tensor.name, _get_tf_type_name(float_type))) float_tensor.name, get_tf_type_name(float_type)))
# If found, validate that the operator input is quantized and compatible # If found, validate that the operator input is quantized and compatible
# with the final model output type # with the final model output type
quant_type = _convert_tflite_enum_type_to_tf_type(quant_tensor.type) quant_type = _convert_tflite_enum_type_to_tf_type(quant_tensor.type)
@ -777,17 +777,17 @@ def _modify_model_output_type(model, inference_output_type=dtypes.float32):
"Initial model output is not dequantized. Expected type for " "Initial model output is not dequantized. Expected type for "
"tensor with name '{}' should be in {}, instead type is {}".format( "tensor with name '{}' should be in {}, instead type is {}".format(
quant_tensor.name, quant_tensor.name,
tuple(_get_tf_type_name(t) for t in tuple(get_tf_type_name(t) for t in
_MAP_QUANT_TO_IO_TYPES.keys()), _MAP_QUANT_TO_IO_TYPES.keys()),
_get_tf_type_name(quant_type))) get_tf_type_name(quant_type)))
else: else:
inference_io_types = _MAP_QUANT_TO_IO_TYPES[quant_type] inference_io_types = _MAP_QUANT_TO_IO_TYPES[quant_type]
if inference_output_type not in inference_io_types: if inference_output_type not in inference_io_types:
raise ValueError( raise ValueError(
"Unsupported `inference_output_type` value. Expected to be in " "Unsupported `inference_output_type` value. Expected to be in "
"{}, instead got {}.".format( "{}, instead got {}.".format(
tuple(_get_tf_type_name(t) for t in inference_io_types), tuple(get_tf_type_name(t) for t in inference_io_types),
_get_tf_type_name(inference_output_type))) get_tf_type_name(inference_output_type)))
output_dequant_ops.append(op) output_dequant_ops.append(op)
if len(subgraph.outputs) != len(output_dequant_ops): if len(subgraph.outputs) != len(output_dequant_ops):
@ -834,7 +834,7 @@ def _modify_model_output_type(model, inference_output_type=dtypes.float32):
else: else:
raise ValueError( raise ValueError(
"Unsupported `inference_output_type` value {}.".format( "Unsupported `inference_output_type` value {}.".format(
_get_tf_type_name(inference_output_type))) get_tf_type_name(inference_output_type)))
def modify_model_io_type( def modify_model_io_type(