Update descriptions and error messages in the TFLite Converter code
PiperOrigin-RevId: 350175211 Change-Id: Ib5c66c37a307da74a3fa96a7cab8f0f3d23e7138
This commit is contained in:
parent
390e2deb75
commit
2c687ec4d9
@ -110,7 +110,7 @@ class OpsSet(enum.Enum):
|
||||
"EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8"
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
return str(self.value)
|
||||
|
||||
@staticmethod
|
||||
def get_options():
|
||||
@ -394,22 +394,22 @@ def build_toco_convert_protos(input_tensors,
|
||||
input_tensors: List of input tensors. Type and shape are computed using
|
||||
`foo.shape` and `foo.dtype`.
|
||||
output_tensors: List of output tensors (only .name is used from this).
|
||||
inference_type: Target data type of real-number arrays in the output file.
|
||||
Must be `{tf.float32, tf.uint8, tf.int8}`. (default tf.float32)
|
||||
inference_input_type: Target data type of real-number input arrays. Allows
|
||||
for a different type for input arrays in the case of quantization. Must be
|
||||
`{tf.float32, tf.uint8, tf.int8}`. (default `inference_type`)
|
||||
input_format: Type of data to read Currently must be
|
||||
`{TENSORFLOW_GRAPHDEF}`. (default TENSORFLOW_GRAPHDEF)
|
||||
input_shapes: Input array shape. It needs to be a list of the same length as
|
||||
`input_tensors`, or None. (default None)
|
||||
output_format: Output file format. Currently must be `{TFLITE,
|
||||
GRAPHVIZ_DOT}`. (default TFLITE)
|
||||
quantized_input_stats: List of tuples of floats representing the mean and
|
||||
standard deviation. Each tuple maps to the corresponding input tensor.
|
||||
Only need if `inference_input_type` is `QUANTIZED_UINT8` or `INT8`.
|
||||
real_input_value = (quantized_input_value - mean_value) / std_dev_value.
|
||||
(default None)
|
||||
inference_type: Data type of numeric arrays, excluding the input layer.
|
||||
(default tf.float32, must be in {tf.float32, tf.int8, tf.uint8})
|
||||
inference_input_type: Data type of the numeric arrays in the input layer. If
|
||||
`inference_input_type` is in {tf.int8, tf.uint8}, then
|
||||
`quantized_input_stats` must be provided. (default is the value assigned
|
||||
to `inference_type`, must be in {tf.float32, tf.int8, tf.uint8})
|
||||
input_format: Type of data to read.
|
||||
(default TENSORFLOW_GRAPHDEF, must be in {TENSORFLOW_GRAPHDEF})
|
||||
input_shapes: Input array shape. (default None, must be None or a list of
|
||||
the same length as `input_tensors`.)
|
||||
output_format: Output file format. (default TFLITE, must be in
|
||||
{TFLITE, GRAPHVIZ_DOT})
|
||||
quantized_input_stats: Map of input tensor names to a tuple of floats
|
||||
representing the mean and standard deviation of the training data.
|
||||
(e.g., {"foo" : (0., 1.)}). Required if `inference_input_type` is tf.int8
|
||||
or tf.uint8. (default None)
|
||||
default_ranges_stats: Tuple of integers representing (min, max) range values
|
||||
for all arrays without a specified range. Intended for experimenting with
|
||||
quantization via "dummy quantization". (default None)
|
||||
@ -574,8 +574,10 @@ def toco_convert_graph_def(input_data, input_arrays_with_shape, output_arrays,
|
||||
if _requires_input_stats(toco_flags):
|
||||
if (("quantized_input_stats" not in kwargs) or
|
||||
(not kwargs["quantized_input_stats"])):
|
||||
raise ValueError("std_dev and mean must be defined when inference_type "
|
||||
"or inference_input_type is QUANTIZED_UINT8 or INT8.")
|
||||
raise ValueError(
|
||||
"The `quantized_input_stats` flag must be defined when either "
|
||||
"`inference_type` flag or `inference_input_type` flag is set to "
|
||||
"tf.int8 or tf.uint8.")
|
||||
input_array.mean_value, input_array.std_value = kwargs[
|
||||
"quantized_input_stats"][idx]
|
||||
input_array.name = name
|
||||
@ -661,7 +663,7 @@ def toco_convert(input_data, input_tensors, output_tensors, *args, **kwargs):
|
||||
Typically this function is used to convert from TensorFlow GraphDef to TFLite.
|
||||
Conversion can be customized by providing arguments that are forwarded to
|
||||
`build_toco_convert_protos` (see documentation for details). This function has
|
||||
been deprecated. Please use `lite.TFLiteConverter` instead.
|
||||
been deprecated. Please use `tf.lite.TFLiteConverter` instead.
|
||||
|
||||
Args:
|
||||
input_data: Input data (i.e. often `sess.graph_def`),
|
||||
|
@ -137,7 +137,7 @@ class ConvertTest(test_util.TensorFlowTestCase):
|
||||
self.assertEqual("output", output_details[0]["name"])
|
||||
self.assertEqual(np.uint8, output_details[0]["dtype"])
|
||||
self.assertTrue(([1, 16, 16, 3] == output_details[0]["shape"]).all())
|
||||
self.assertTrue(output_details[0]["quantization"][0] > 0) # scale
|
||||
self.assertGreater(output_details[0]["quantization"][0], 0) # scale
|
||||
|
||||
def testGraphDefQuantizationInvalid(self):
|
||||
with ops.Graph().as_default():
|
||||
@ -159,9 +159,9 @@ class ConvertTest(test_util.TensorFlowTestCase):
|
||||
enable_mlir_converter=False,
|
||||
inference_type=dtypes.uint8)
|
||||
self.assertEqual(
|
||||
"std_dev and mean must be defined when inference_type or "
|
||||
"inference_input_type is QUANTIZED_UINT8 or INT8.",
|
||||
str(error.exception))
|
||||
"The `quantized_input_stats` flag must be defined when either "
|
||||
"`inference_type` flag or `inference_input_type` flag is set to "
|
||||
"tf.int8 or tf.uint8.", str(error.exception))
|
||||
|
||||
|
||||
class ConvertTestOpHint(test_util.TensorFlowTestCase):
|
||||
|
@ -61,6 +61,7 @@ from tensorflow.lite.python.util import get_debug_info as _get_debug_info
|
||||
from tensorflow.lite.python.util import get_grappler_config as _get_grappler_config
|
||||
from tensorflow.lite.python.util import get_tensor_name as _get_tensor_name
|
||||
from tensorflow.lite.python.util import get_tensors_from_tensor_names as _get_tensors_from_tensor_names
|
||||
from tensorflow.lite.python.util import get_tf_type_name as _get_tf_type_name
|
||||
from tensorflow.lite.python.util import is_frozen_graph as _is_frozen_graph
|
||||
from tensorflow.lite.python.util import model_input_signature as _model_input_signature
|
||||
from tensorflow.lite.python.util import modify_model_io_type as _modify_model_io_type
|
||||
@ -89,19 +90,14 @@ from tensorflow.python.util.tf_export import tf_export as _tf_export
|
||||
|
||||
@_tf_export("lite.Optimize")
|
||||
class Optimize(enum.Enum):
|
||||
"""Enum defining the optimizations to apply when generating tflite graphs.
|
||||
|
||||
Some optimizations may come at the cost of accuracy.
|
||||
"""Enum defining the optimizations to apply when generating a tflite model.
|
||||
|
||||
DEFAULT
|
||||
Default optimization strategy.
|
||||
|
||||
Converter will do its best to improve size and latency based on the
|
||||
information provided.
|
||||
Enhanced optimizations are gained by providing a representative_dataset.
|
||||
This is recommended, and is currently equivalent to the modes below.
|
||||
Currently, weights will be quantized and if representative_dataset is
|
||||
provided, activations for quantizable operations will also be quantized.
|
||||
Default optimization strategy that quantizes model weights. Enhanced
|
||||
optimizations are gained by providing a representative dataset that
|
||||
quantizes biases and activations as well.
|
||||
Converter will do its best to reduce size and latency, while minimizing
|
||||
the loss in accuracy.
|
||||
|
||||
OPTIMIZE_FOR_SIZE
|
||||
Deprecated. Does the same as DEFAULT.
|
||||
@ -110,14 +106,11 @@ class Optimize(enum.Enum):
|
||||
Deprecated. Does the same as DEFAULT.
|
||||
"""
|
||||
|
||||
# Default optimization strategy.
|
||||
#
|
||||
# Converter will do its best to improve size and latency based on the
|
||||
# information provided.
|
||||
# Enhanced optimizations can be gained by providing a representative_dataset.
|
||||
# This is recommended, and is currently equivalent to the modes below.
|
||||
# Currently, weights will be quantized and if representative_dataset is
|
||||
# provided, activations for quantizable operations will also be quantized.
|
||||
# Default optimization strategy that quantizes model weights. Enhanced
|
||||
# optimizations are gained by providing a representative dataset that
|
||||
# quantizes biases and activations as well.
|
||||
# Converter will do its best to reduce size and latency, while minimizing
|
||||
# the loss in accuracy.
|
||||
DEFAULT = "DEFAULT"
|
||||
|
||||
# Deprecated. Does the same as DEFAULT.
|
||||
@ -132,48 +125,47 @@ class Optimize(enum.Enum):
|
||||
|
||||
@_tf_export("lite.RepresentativeDataset")
|
||||
class RepresentativeDataset(object):
|
||||
"""Representative dataset to evaluate optimizations.
|
||||
"""Representative dataset used to optimize the model.
|
||||
|
||||
A representative dataset that can be used to evaluate optimizations by the
|
||||
converter. E.g. converter can use these examples to estimate (min, max) ranges
|
||||
by calibrating the model on inputs. This can allow converter to quantize a
|
||||
converted floating point model.
|
||||
This is a generator function that provides a small dataset to calibrate or
|
||||
estimate the range, i.e, (min, max) of all floating-point arrays in the model
|
||||
(such as model input, activation outputs of intermediate layers, and model
|
||||
output) for quantization. Usually, this is a small subset of a few hundred
|
||||
samples randomly chosen, in no particular order, from the training or
|
||||
evaluation dataset.
|
||||
"""
|
||||
|
||||
def __init__(self, input_gen):
|
||||
"""Creates a representative dataset.
|
||||
|
||||
Args:
|
||||
input_gen: an input generator that can be used to generate input samples
|
||||
for the model. This must be a callable object that returns an object
|
||||
that supports the `iter()` protocol (e.g. a generator function). The
|
||||
elements generated must have same type and shape as inputs to the model.
|
||||
input_gen: A generator function that generates input samples for the
|
||||
model and has the same order, type and shape as the inputs to the model.
|
||||
Usually, this is a small subset of a few hundred samples randomly
|
||||
chosen, in no particular order, from the training or evaluation dataset.
|
||||
"""
|
||||
self.input_gen = input_gen
|
||||
|
||||
|
||||
@_tf_export("lite.TargetSpec")
|
||||
class TargetSpec(object):
|
||||
"""Specification of target device.
|
||||
|
||||
Details about target device. Converter optimizes the generated model for
|
||||
specific device.
|
||||
"""Specification of target device used to optimize the model.
|
||||
|
||||
Attributes:
|
||||
supported_ops: Experimental flag, subject to change. Set of OpsSet options
|
||||
supported by the device. (default set([OpsSet.TFLITE_BUILTINS]))
|
||||
supported_types: List of types for constant values on the target device.
|
||||
Frequently, an optimization choice is driven by the most compact
|
||||
(i.e. smallest) type in this list (default [tf.float32])
|
||||
supported_ops: Experimental flag, subject to change. Set of `tf.lite.OpsSet`
|
||||
options, where each option represents a set of operators supported by the
|
||||
target device. (default {tf.lite.OpsSet.TFLITE_BUILTINS}))
|
||||
supported_types: Set of `tf.dtypes.DType` data types supported on the target
|
||||
device. If initialized, optimization might be driven by the smallest type
|
||||
in this set. (default set())
|
||||
experimental_select_user_tf_ops: Experimental flag, subject to change. Set
|
||||
of user's TensorFlow operators' names that are required in the TensorFlow
|
||||
Lite runtime. These ops will be exported as select TensorFlow ops in the
|
||||
model (in conjunction with the OpsSet.SELECT_TF_OPS flag). This is an
|
||||
advanced feature that should only be used if the client is using TF ops
|
||||
model (in conjunction with the tf.lite.OpsSet.SELECT_TF_OPS flag). This is
|
||||
an advanced feature that should only be used if the client is using TF ops
|
||||
that may not be linked in by default with the TF ops that are provided
|
||||
when using the SELECT_TF_OPS path. The client is responsible for linking
|
||||
these ops into the target runtime.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@ -181,17 +173,17 @@ class TargetSpec(object):
|
||||
supported_types=None,
|
||||
experimental_select_user_tf_ops=None):
|
||||
if supported_ops is None:
|
||||
supported_ops = set([OpsSet.TFLITE_BUILTINS])
|
||||
supported_ops = {OpsSet.TFLITE_BUILTINS}
|
||||
self.supported_ops = supported_ops
|
||||
if supported_types is None:
|
||||
supported_types = []
|
||||
supported_types = set()
|
||||
self.supported_types = supported_types
|
||||
if experimental_select_user_tf_ops is None:
|
||||
self.experimental_select_user_tf_ops = []
|
||||
self.experimental_select_user_tf_ops = set()
|
||||
|
||||
|
||||
class QuantizationMode(object):
|
||||
"""QuantizationMode determines the quantized conversion from user options."""
|
||||
"""QuantizationMode determines the quantization type from user options."""
|
||||
|
||||
def __init__(self, optimizations, target_spec, representative_dataset,
|
||||
graph_def):
|
||||
@ -205,7 +197,6 @@ class QuantizationMode(object):
|
||||
# TODO(b/162537905): Refactor the following quantization functions -
|
||||
# re-organize and refactor for better readability.
|
||||
def post_training_int8_no_float(self):
|
||||
"""Post training int8 quantize, disallow float fallback."""
|
||||
return (self._any_optimization_enabled() and
|
||||
self._is_int8_target_required() and
|
||||
not self._is_int16x8_target_required() and
|
||||
@ -213,19 +204,16 @@ class QuantizationMode(object):
|
||||
self._representative_dataset is not None)
|
||||
|
||||
def post_training_int8_allow_float(self):
|
||||
"""Post training int8 quantize, allow float fallback."""
|
||||
return (self._any_optimization_enabled() and
|
||||
not self._is_int16x8_target_required() and
|
||||
self._representative_dataset is not None and
|
||||
self._smallest_supported_type() == _dtypes.int8)
|
||||
|
||||
def is_post_training_integer_quantize_8(self):
|
||||
"""Post training integer 8 quantization."""
|
||||
return (self.post_training_int8_no_float() or
|
||||
self.post_training_int8_allow_float())
|
||||
|
||||
def is_post_training_integer_quantize_16x8(self):
|
||||
"""Post training integer 16x8 quantization."""
|
||||
return (self.post_training_int16x8_no_float() or
|
||||
self.post_training_int16x8_allow_float())
|
||||
|
||||
@ -239,7 +227,6 @@ class QuantizationMode(object):
|
||||
self.contains_training_quant_op())
|
||||
|
||||
def post_training_int16x8_no_float(self):
|
||||
"""Post training int16x8 quantize, disallow float fallback."""
|
||||
return (self._any_optimization_enabled() and
|
||||
not self._is_int8_target_required() and
|
||||
self._is_int16x8_target_required() and
|
||||
@ -247,13 +234,11 @@ class QuantizationMode(object):
|
||||
self._representative_dataset is not None)
|
||||
|
||||
def post_training_int16x8_allow_float(self):
|
||||
"""Post training int16x8 quantize, allow float fallback."""
|
||||
return (self._any_optimization_enabled() and
|
||||
self._is_int16x8_target_required() and
|
||||
self._is_allow_float())
|
||||
|
||||
def post_training_dynamic_range_int8(self):
|
||||
"""Post training int8 const, on-the-fly int8 quantize of dynamic tensors."""
|
||||
# Post-training dynamic range quantization is only enabled if post-training
|
||||
# int8 quantization and training time quantization was not done.
|
||||
return (self._any_optimization_enabled() and
|
||||
@ -262,7 +247,6 @@ class QuantizationMode(object):
|
||||
self._smallest_supported_type() == _dtypes.int8)
|
||||
|
||||
def post_training_fp16(self):
|
||||
"""Post training fp16 quantize."""
|
||||
return (self._any_optimization_enabled() and
|
||||
self._smallest_supported_type() == _dtypes.float16)
|
||||
|
||||
@ -416,21 +400,20 @@ class TFLiteConverterBase(object):
|
||||
"""Converter subclass to share functionality between V1 and V2 converters."""
|
||||
|
||||
def __init__(self):
|
||||
self.allow_custom_ops = False
|
||||
self.target_spec = TargetSpec()
|
||||
self.optimizations = []
|
||||
self.optimizations = set()
|
||||
self.representative_dataset = None
|
||||
self.target_spec = TargetSpec()
|
||||
self.allow_custom_ops = False
|
||||
self.experimental_new_converter = True
|
||||
self._experimental_new_quantizer = False
|
||||
self._experimental_calibrate_only = False
|
||||
# The 'GraphDebugInfo' contains the stack traces of all the original nodes
|
||||
# in the `GraphDef` to the converter.
|
||||
self._debug_info = None
|
||||
self._experimental_sparsify_model = False
|
||||
self._debug_info = None # contains the stack traces of all the original
|
||||
# nodes in the `GraphDef` to the converter.
|
||||
self.saved_model_dir = None
|
||||
self._saved_model_tags = None
|
||||
self._saved_model_version = 0
|
||||
self._saved_model_exported_names = []
|
||||
self._experimental_sparsify_model = False
|
||||
|
||||
def _grappler_config(self, optimizers=None):
|
||||
"""Creates a tf.compat.v1.ConfigProto for configuring Grappler.
|
||||
@ -684,9 +667,9 @@ class TFLiteSavedModelConverterV2(TFLiteConverterBaseV2):
|
||||
saved_model_dir: Directory of the SavedModel.
|
||||
saved_model_tags: Set of tags identifying the MetaGraphDef within the
|
||||
SavedModel to analyze. All tags in the tag set must be present. (default
|
||||
set(SERVING)).
|
||||
saved_model_exported_names: Names to be exported (default: export all)
|
||||
when the saved model import path is on.
|
||||
{tf.saved_model.SERVING}).
|
||||
saved_model_exported_names: Names to be exported when the saved model
|
||||
import path is on.
|
||||
trackable_obj: tf.AutoTrackable object associated with `funcs`. A
|
||||
reference to this object needs to be maintained so that Variables do not
|
||||
get garbage collected since functions have a weak reference to
|
||||
@ -972,20 +955,21 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2):
|
||||
"""Converts a TensorFlow model into TensorFlow Lite model.
|
||||
|
||||
Attributes:
|
||||
allow_custom_ops: Boolean indicating whether to allow custom operations.
|
||||
When False, any unknown operation is an error. When True, custom ops are
|
||||
created for any op that is unknown. The developer needs to provide these
|
||||
to the TensorFlow Lite runtime with a custom resolver. (default False)
|
||||
optimizations: Experimental flag, subject to change. A list of optimizations
|
||||
to apply when converting the model. E.g. `[Optimize.DEFAULT]`
|
||||
representative_dataset: A representative dataset that can be used to
|
||||
generate input and output samples for the model. The converter can use the
|
||||
dataset to evaluate different optimizations. Note that this is an optional
|
||||
attribute but it is necessary if INT8 is the only support builtin ops in
|
||||
target ops.
|
||||
optimizations: Experimental flag, subject to change. Set of optimizations
|
||||
to apply. e.g {tf.lite.Optimize.DEFAULT}. (default None, must be None or a
|
||||
set of values of type `tf.lite.Optimize`)
|
||||
representative_dataset: A generator function used for integer quantization
|
||||
where each generated sample has the same order, type and shape as the
|
||||
inputs to the model. Usually, this is a small subset of a few hundred
|
||||
samples randomly chosen, in no particular order, from the training or
|
||||
evaluation dataset. This is an optional attribute, but required for full
|
||||
integer quantization, i.e, if `tf.int8` is the only supported type in
|
||||
`target_spec.supported_types`. Refer to `tf.lite.RepresentativeDataset`.
|
||||
(default None)
|
||||
target_spec: Experimental flag, subject to change. Specifications of target
|
||||
device, including supported ops set, supported types and a set of user's
|
||||
defined TensorFlow operators required in the TensorFlow Lite runtime.
|
||||
Refer to `tf.lite.TargetSpec`.
|
||||
inference_input_type: Data type of the input layer. Note that integer types
|
||||
(tf.int8 and tf.uint8) are currently only supported for post training
|
||||
integer quantization and quantization aware training. (default tf.float32,
|
||||
@ -994,8 +978,13 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2):
|
||||
types (tf.int8 and tf.uint8) are currently only supported for post
|
||||
training integer quantization and quantization aware training. (default
|
||||
tf.float32, must be in {tf.float32, tf.int8, tf.uint8})
|
||||
allow_custom_ops: Boolean indicating whether to allow custom operations.
|
||||
When False, any unknown operation is an error. When True, custom ops are
|
||||
created for any op that is unknown. The developer needs to provide these
|
||||
to the TensorFlow Lite runtime with a custom resolver. (default False)
|
||||
experimental_new_converter: Experimental flag, subject to change. Enables
|
||||
MLIR-based conversion instead of TOCO conversion. (default True)
|
||||
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
@ -1063,7 +1052,8 @@ class TFLiteConverterV2(TFLiteFrozenGraphConverterV2):
|
||||
`signatures` attribute of the MetaGraphdef is used. (default
|
||||
saved_model.signatures)
|
||||
tags: Set of tags identifying the MetaGraphDef within the SavedModel to
|
||||
analyze. All tags in the tag set must be present. (default set(SERVING))
|
||||
analyze. All tags in the tag set must be present. (default
|
||||
{tf.saved_model.SERVING} or {'serve'})
|
||||
|
||||
Returns:
|
||||
TFLiteConverter object.
|
||||
@ -1209,9 +1199,13 @@ class TFLiteConverterBaseV1(TFLiteConverterBase):
|
||||
|
||||
if (requires_quantized_input_stats and
|
||||
not converter_kwargs["quantized_input_stats"]):
|
||||
raise ValueError("The `quantized_input_stats` flag must be defined when "
|
||||
"either `inference_type` flag or `inference_input_type` "
|
||||
"flag is set to tf.uint8 or tf.int8.")
|
||||
raise ValueError(
|
||||
"The `quantized_input_stats` flag must be defined when either "
|
||||
"`inference_type` flag or `inference_input_type` flag is set to "
|
||||
"tf.int8 or tf.uint8. Currently, `inference_type={}` and "
|
||||
"`inference_input_type={}`.".format(
|
||||
_get_tf_type_name(converter_kwargs["inference_type"]),
|
||||
_get_tf_type_name(converter_kwargs["inference_input_type"])))
|
||||
|
||||
def convert(self):
|
||||
"""Converts a TensorFlow GraphDef based on instance variables.
|
||||
@ -1424,9 +1418,9 @@ class TFLiteSavedModelConverter(TFLiteConverterBaseV1):
|
||||
saved_model_dir: Directory of the SavedModel.
|
||||
saved_model_tags: Set of tags identifying the MetaGraphDef within the
|
||||
SavedModel to analyze. All tags in the tag set must be present. (default
|
||||
set(SERVING)).
|
||||
saved_model_exported_names: Names to be exported (default: export all)
|
||||
when the saved model import path is on.
|
||||
{tf.saved_model.SERVING}).
|
||||
saved_model_exported_names: Names to be exported when the saved model
|
||||
import path is on.
|
||||
experimental_debug_info_func: An experimental function to retrieve the
|
||||
graph debug info for a set of nodes from the `graph_def`.
|
||||
|
||||
@ -1645,33 +1639,42 @@ class TFLiteConverter(TFLiteFrozenGraphConverter):
|
||||
model into either a TFLite FlatBuffer or graph visualization.
|
||||
|
||||
Attributes:
|
||||
inference_type: Target data type of real-number arrays in the output file.
|
||||
Must be `{tf.float32, tf.uint8}`. If `optimzations` are provided, this
|
||||
parameter is ignored. (default tf.float32)
|
||||
inference_input_type: Target data type of real-number input arrays. Allows
|
||||
for a different type for input arrays. If an integer type is provided and
|
||||
`optimizations` are not used, `quantized_input_stats` must be provided. If
|
||||
`inference_type` is tf.uint8, signaling conversion to a fully quantized
|
||||
model from a quantization-aware trained input model, then
|
||||
`inference_input_type` defaults to tf.uint8. In all other cases,
|
||||
`inference_input_type` defaults to tf.float32. Must be `{tf.float32,
|
||||
tf.uint8, tf.int8}`
|
||||
inference_output_type: Target data type of real-number output arrays. Allows
|
||||
for a different type for output arrays. If `inference_type` is tf.uint8,
|
||||
signaling conversion to a fully quantized model from a quantization-aware
|
||||
trained output model, then `inference_output_type` defaults to tf.uint8.
|
||||
In all other cases, `inference_output_type` must be tf.float32, an error
|
||||
will be thrown otherwise. Must be `{tf.float32, tf.uint8, tf.int8}`
|
||||
output_format: Output file format. Currently must be `{TFLITE,
|
||||
GRAPHVIZ_DOT}`. (default TFLITE)
|
||||
quantized_input_stats: Dict of strings representing input tensor names
|
||||
mapped to tuple of floats representing the mean and standard deviation
|
||||
of the training data (e.g., {"foo" : (0., 1.)}). Only need if
|
||||
`inference_input_type` is `QUANTIZED_UINT8`. real_input_value =
|
||||
(quantized_input_value - mean_value) / std_dev_value. (default {})
|
||||
default_ranges_stats: Tuple of integers representing (min, max) range values
|
||||
for all arrays without a specified range. Intended for experimenting with
|
||||
quantization via "dummy quantization". (default None)
|
||||
optimizations: Experimental flag, subject to change. Set of optimizations to
|
||||
apply. e.g {tf.lite.Optimize.DEFAULT}. (default None, must be None or a
|
||||
set of values of type `tf.lite.Optimize`)
|
||||
representative_dataset: A generator function used for integer quantization
|
||||
where each generated sample has the same order, type and shape as the
|
||||
inputs to the model. Usually, this is a small subset of a few hundred
|
||||
samples randomly chosen, in no particular order, from the training or
|
||||
evaluation dataset. This is an optional attribute, but required for full
|
||||
integer quantization, i.e, if `tf.int8` is the only supported type in
|
||||
`target_spec.supported_types`. Refer to `tf.lite.RepresentativeDataset`.
|
||||
(default None)
|
||||
target_spec: Experimental flag, subject to change. Specifications of target
|
||||
device, including supported ops set, supported types and a set of user's
|
||||
defined TensorFlow operators required in the TensorFlow Lite runtime.
|
||||
Refer to `tf.lite.TargetSpec`.
|
||||
inference_type: Data type of numeric arrays, excluding the input layer.
|
||||
(default tf.float32, must be in {tf.float32, tf.int8, tf.uint8})
|
||||
inference_input_type: Data type of the numeric arrays in the input layer. If
|
||||
`inference_input_type` is in {tf.int8, tf.uint8}, then
|
||||
`quantized_input_stats` must be provided. (default is the value assigned
|
||||
to `inference_type`, must be in {tf.float32, tf.int8, tf.uint8})
|
||||
inference_output_type: Data type of the numeric arrays in the output layer.
|
||||
(default is the value assigned to `inference_type`, must be in
|
||||
{tf.float32, tf.int8, tf.uint8})
|
||||
quantized_input_stats: Map of input tensor names to a tuple of floats
|
||||
representing the mean and standard deviation of the training data.
|
||||
(e.g., {"foo" : (0., 1.)}). Required if `inference_input_type` is tf.int8
|
||||
or tf.uint8. (default None)
|
||||
default_ranges_stats: Tuple of integers (min, max) representing range values
|
||||
for all numeric arrays without a specified range. Intended for
|
||||
experimenting with quantization via "dummy quantization". (default None)
|
||||
allow_custom_ops: Boolean indicating whether to allow custom operations.
|
||||
When False any unknown operation is an error. When True, custom ops are
|
||||
created for any op that is unknown. The developer will need to provide
|
||||
these to the TensorFlow Lite runtime with a custom resolver. (default
|
||||
False)
|
||||
drop_control_dependency: Boolean indicating whether to drop control
|
||||
dependencies silently. This is due to TFLite not supporting control
|
||||
dependencies. (default True)
|
||||
@ -1683,37 +1686,25 @@ class TFLiteConverter(TFLiteFrozenGraphConverter):
|
||||
change_concat_input_ranges: Boolean to change behavior of min/max ranges for
|
||||
inputs and outputs of the concat operator for quantized models. Changes
|
||||
the ranges of concat operator overlap when true. (default False)
|
||||
allow_custom_ops: Boolean indicating whether to allow custom operations.
|
||||
When false any unknown operation is an error. When true, custom ops are
|
||||
created for any op that is unknown. The developer will need to provide
|
||||
these to the TensorFlow Lite runtime with a custom resolver. (default
|
||||
False)
|
||||
post_training_quantize: Deprecated. Please specify `[Optimize.DEFAULT]` for
|
||||
`optimizations` instead. Boolean indicating whether to quantize the
|
||||
weights of the converted float model. Model size will be reduced and
|
||||
there will be latency improvements (at the cost of accuracy). (default
|
||||
False)
|
||||
output_format: Output file format. (default
|
||||
tf.compat.v1.lite.constants.TFLITE, must be in
|
||||
{tf.compat.v1.lite.constants.TFLITE,
|
||||
tf.compat.v1.lite.constants.GRAPHVIZ_DOT})
|
||||
dump_graphviz_dir: Full filepath of folder to dump the graphs at various
|
||||
stages of processing GraphViz .dot files. Preferred over
|
||||
--output_format=GRAPHVIZ_DOT in order to keep the requirements of the
|
||||
output file. (default None)
|
||||
dump_graphviz_video: Boolean indicating whether to dump the graph after
|
||||
every graph transformation. (default False)
|
||||
conversion_summary_dir: A string indicating the path to the generated
|
||||
conversion logs.
|
||||
target_ops: Deprecated. Please specify `target_spec.supported_ops` instead.
|
||||
Set of OpsSet options indicating which converter to use. (default
|
||||
set([OpsSet.TFLITE_BUILTINS]))
|
||||
target_spec: Experimental flag, subject to change. Specifications of target
|
||||
device, including supported ops set, supported types and a set of user's
|
||||
defined TensorFlow operators required in the TensorFlow Lite runtime.
|
||||
optimizations: Experimental flag, subject to change. A list of optimizations
|
||||
to apply when converting the model. E.g. `[Optimize.DEFAULT]`
|
||||
representative_dataset: A representative dataset that can be used to
|
||||
generate input and output samples for the model. The converter can use the
|
||||
dataset to evaluate different optimizations.
|
||||
`output_format=tf.compat.v1.lite.constants.GRAPHVIZ_DOT` in order to keep
|
||||
the requirements of the output file. (default None)
|
||||
dump_graphviz_video: Boolean indicating whether to dump the GraphViz .dot
|
||||
files after every graph transformation. Requires the `dump_graphviz_dir`
|
||||
flag to be specified. (default False)
|
||||
conversion_summary_dir: Full path of the directory to store conversion logs.
|
||||
(default None)
|
||||
target_ops: Deprecated. Please use `target_spec.supported_ops` instead.
|
||||
post_training_quantize: Deprecated. Please use `optimizations` instead and
|
||||
set it to `{tf.lite.Optimize.DEFAULT}`. (default False)
|
||||
experimental_new_converter: Experimental flag, subject to change. Enables
|
||||
MLIR-based conversion instead of TOCO conversion. (default True)
|
||||
|
||||
Example usage:
|
||||
|
||||
```python
|
||||
@ -1911,9 +1902,10 @@ class TFLiteConverter(TFLiteFrozenGraphConverter):
|
||||
output_arrays: List of output tensors to freeze graph with. Uses output
|
||||
arrays from SignatureDef when none are provided. (default None)
|
||||
tag_set: Set of tags identifying the MetaGraphDef within the SavedModel to
|
||||
analyze. All tags in the tag set must be present. (default set("serve"))
|
||||
analyze. All tags in the tag set must be present. (default
|
||||
{tf.saved_model.SERVING})
|
||||
signature_key: Key identifying SignatureDef containing inputs and outputs.
|
||||
(default DEFAULT_SERVING_SIGNATURE_DEF_KEY)
|
||||
(default tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY)
|
||||
|
||||
Returns:
|
||||
TFLiteConverter class.
|
||||
|
@ -1239,18 +1239,22 @@ class FromSessionTest(TestModels, parameterized.TestCase):
|
||||
quantized_converter.inference_type = quantized_type
|
||||
quantized_converter.convert()
|
||||
self.assertEqual(
|
||||
'The `quantized_input_stats` flag must be defined when '
|
||||
'either `inference_type` flag or `inference_input_type` '
|
||||
'flag is set to tf.uint8 or tf.int8.', str(error.exception))
|
||||
'The `quantized_input_stats` flag must be defined when either '
|
||||
'`inference_type` flag or `inference_input_type` flag is set to '
|
||||
'tf.int8 or tf.uint8. Currently, `inference_type=tf.{}` and '
|
||||
'`inference_input_type=None`.'.format(quantized_type.name),
|
||||
str(error.exception))
|
||||
|
||||
with self.assertRaises(ValueError) as error:
|
||||
quantized_converter.inference_type = dtypes.float32
|
||||
quantized_converter.inference_input_type = quantized_type
|
||||
quantized_converter.convert()
|
||||
self.assertEqual(
|
||||
'The `quantized_input_stats` flag must be defined when '
|
||||
'either `inference_type` flag or `inference_input_type` '
|
||||
'flag is set to tf.uint8 or tf.int8.', str(error.exception))
|
||||
'The `quantized_input_stats` flag must be defined when either '
|
||||
'`inference_type` flag or `inference_input_type` flag is set to '
|
||||
'tf.int8 or tf.uint8. Currently, `inference_type=tf.float32` and '
|
||||
'`inference_input_type=tf.{}`.'.format(quantized_type.name),
|
||||
str(error.exception))
|
||||
|
||||
quantized_converter.inference_type = quantized_type
|
||||
quantized_converter.inference_input_type = quantized_type
|
||||
|
@ -127,9 +127,9 @@ def _convert_tflite_enum_type_to_tf_type(tflite_enum_type):
|
||||
return tf_type
|
||||
|
||||
|
||||
def _get_tf_type_name(tf_type):
|
||||
def get_tf_type_name(tf_type):
|
||||
"""Converts tf.dtype (eg: tf.float32) to str (eg: "tf.float32")."""
|
||||
return "tf." + tf_type.name
|
||||
return "tf." + tf_type.name if tf_type else None
|
||||
|
||||
|
||||
def get_tensor_name(tensor):
|
||||
@ -674,7 +674,7 @@ def _modify_model_input_type(model, inference_input_type=dtypes.float32):
|
||||
raise ValueError(
|
||||
"Initial model input type must be tf.float32. Expected type for "
|
||||
"tensor with name '{}' is tf.float32, instead type is {}".format(
|
||||
float_tensor.name, _get_tf_type_name(float_type)))
|
||||
float_tensor.name, get_tf_type_name(float_type)))
|
||||
# If found, validate that the operator output is quantized and compatible
|
||||
# with the final model input type
|
||||
quant_type = _convert_tflite_enum_type_to_tf_type(quant_tensor.type)
|
||||
@ -683,17 +683,17 @@ def _modify_model_input_type(model, inference_input_type=dtypes.float32):
|
||||
"Initial model input is not quantized. Expected type for "
|
||||
"tensor with name '{}' should be in {}, instead type is {}".format(
|
||||
quant_tensor.name,
|
||||
tuple(_get_tf_type_name(t) for t in
|
||||
tuple(get_tf_type_name(t) for t in
|
||||
_MAP_QUANT_TO_IO_TYPES.keys()),
|
||||
_get_tf_type_name(quant_type)))
|
||||
get_tf_type_name(quant_type)))
|
||||
else:
|
||||
inference_io_types = _MAP_QUANT_TO_IO_TYPES[quant_type]
|
||||
if inference_input_type not in inference_io_types:
|
||||
raise ValueError(
|
||||
"Unsupported `inference_input_type` value. Expected to be in "
|
||||
"{}, instead got {}.".format(
|
||||
tuple(_get_tf_type_name(t) for t in inference_io_types),
|
||||
_get_tf_type_name(inference_input_type)))
|
||||
tuple(get_tf_type_name(t) for t in inference_io_types),
|
||||
get_tf_type_name(inference_input_type)))
|
||||
input_quant_ops.append(op)
|
||||
|
||||
if len(subgraph.inputs) != len(input_quant_ops):
|
||||
@ -725,7 +725,7 @@ def _modify_model_input_type(model, inference_input_type=dtypes.float32):
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unsupported `inference_input_type` value {}.".format(
|
||||
_get_tf_type_name(inference_input_type)))
|
||||
get_tf_type_name(inference_input_type)))
|
||||
|
||||
|
||||
def _modify_model_output_type(model, inference_output_type=dtypes.float32):
|
||||
@ -768,7 +768,7 @@ def _modify_model_output_type(model, inference_output_type=dtypes.float32):
|
||||
raise ValueError(
|
||||
"Initial model output type must be tf.float32. Expected type for "
|
||||
"tensor with name '{}' is tf.float32, instead type is {}".format(
|
||||
float_tensor.name, _get_tf_type_name(float_type)))
|
||||
float_tensor.name, get_tf_type_name(float_type)))
|
||||
# If found, validate that the operator input is quantized and compatible
|
||||
# with the final model output type
|
||||
quant_type = _convert_tflite_enum_type_to_tf_type(quant_tensor.type)
|
||||
@ -777,17 +777,17 @@ def _modify_model_output_type(model, inference_output_type=dtypes.float32):
|
||||
"Initial model output is not dequantized. Expected type for "
|
||||
"tensor with name '{}' should be in {}, instead type is {}".format(
|
||||
quant_tensor.name,
|
||||
tuple(_get_tf_type_name(t) for t in
|
||||
tuple(get_tf_type_name(t) for t in
|
||||
_MAP_QUANT_TO_IO_TYPES.keys()),
|
||||
_get_tf_type_name(quant_type)))
|
||||
get_tf_type_name(quant_type)))
|
||||
else:
|
||||
inference_io_types = _MAP_QUANT_TO_IO_TYPES[quant_type]
|
||||
if inference_output_type not in inference_io_types:
|
||||
raise ValueError(
|
||||
"Unsupported `inference_output_type` value. Expected to be in "
|
||||
"{}, instead got {}.".format(
|
||||
tuple(_get_tf_type_name(t) for t in inference_io_types),
|
||||
_get_tf_type_name(inference_output_type)))
|
||||
tuple(get_tf_type_name(t) for t in inference_io_types),
|
||||
get_tf_type_name(inference_output_type)))
|
||||
output_dequant_ops.append(op)
|
||||
|
||||
if len(subgraph.outputs) != len(output_dequant_ops):
|
||||
@ -834,7 +834,7 @@ def _modify_model_output_type(model, inference_output_type=dtypes.float32):
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unsupported `inference_output_type` value {}.".format(
|
||||
_get_tf_type_name(inference_output_type)))
|
||||
get_tf_type_name(inference_output_type)))
|
||||
|
||||
|
||||
def modify_model_io_type(
|
||||
|
Loading…
Reference in New Issue
Block a user