Converter API change to enable optimization with sparsity.

To enable optimization with sparsity, use:
converter.optimizations = [tf.lite.Optimize.SPARSITY]
converter.convert()

Note:
1) This feature is experimental
2) It requires the use of during training pruning to be effective.
3) Not all kernels have been optimized for sparse execution, so the initial benefit will primarily be in the model size on disk.
PiperOrigin-RevId: 351245576
Change-Id: I2a771f2a5ead92bdf93821af9f8058b0957b5aef
This commit is contained in:
Yunlu Li 2021-01-11 15:13:05 -08:00 committed by TensorFlower Gardener
parent 022122691d
commit a0e249979f
5 changed files with 53 additions and 7 deletions

View File

@ -78,7 +78,8 @@
* TFLiteConverter exports models with SignatureDef
* Interpreter supports getting a list of signatures and getting callable
function for a given signaturedef.
* Add int8 support for `ReshapeV2`.
* Add int8 support for `ReshapeV2`.
* Add experimental support for optimization with sparsity.
* TF Core:
* Corrected higher-order gradients of control flow constructs (`tf.cond`,
`tf.while_loop`, and compositions like `tf.foldl`) computed with

View File

@ -104,6 +104,18 @@ class Optimize(enum.Enum):
OPTIMIZE_FOR_LATENCY
Deprecated. Does the same as DEFAULT.
EXPERIMENTAL_SPARSITY
Experimental flag, subject to change.
Enable optimization by taking advantage of the sparse model weights
trained with pruning.
The converter will inspect the sparsity pattern of the model weights and
do its best to improve size and latency.
The flag can be used alone to optimize float32 models with sparse weights.
It can also be used together with the DEFAULT optimization mode to
optimize quantized models with sparse weights.
"""
# Default optimization strategy that quantizes model weights. Enhanced
@ -119,6 +131,18 @@ class Optimize(enum.Enum):
# Deprecated. Does the same as DEFAULT.
OPTIMIZE_FOR_LATENCY = "OPTIMIZE_FOR_LATENCY"
# Experimental flag, subject to change.
# Enable optimization by taking advantage of the sparse model weights trained
# with pruning.
#
# The converter will inspect the sparsity pattern of the model weights and do
# its best to improve size and latency.
# The flag can be used alone to optimize float32 models with sparse weights.
# It can also be used together with the DEFAULT optimization mode to optimize
# quantized models with sparse weights.
# TODO(b/161560631): Add log message when this optimization is applied.
EXPERIMENTAL_SPARSITY = "EXPERIMENTAL_SPARSITY"
def __str__(self):
return str(self.value)
@ -538,6 +562,9 @@ class TFLiteConverterBase(object):
raise ValueError("SavedModel file format({0}) is not supported".format(
self._saved_model_version))
def _sparsify_model(self):
return Optimize.EXPERIMENTAL_SPARSITY in self.optimizations
class TFLiteConverterBaseV2(TFLiteConverterBase):
"""Converter subclass to share functionality between V2 converters."""
@ -643,7 +670,7 @@ class TFLiteConverterBaseV2(TFLiteConverterBase):
if flags_modify_model_io_type:
result = _modify_model_io_type(result, **flags_modify_model_io_type)
if self._experimental_sparsify_model:
if self._sparsify_model():
result = _mlir_sparsify(result)
return result
@ -752,7 +779,7 @@ class TFLiteSavedModelConverterV2(TFLiteConverterBaseV2):
if flags_modify_model_io_type:
result = _modify_model_io_type(result, **flags_modify_model_io_type)
if self._experimental_sparsify_model:
if self._sparsify_model():
result = _mlir_sparsify(result)
return result
@ -1337,7 +1364,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase):
if flags_modify_model_io_type:
result = _modify_model_io_type(result, **flags_modify_model_io_type)
if self._experimental_sparsify_model:
if self._sparsify_model():
result = _mlir_sparsify(result)
return result

View File

@ -2622,11 +2622,21 @@ class FromKerasFile(TestModels, parameterized.TestCase):
converter.convert()
self.assertValidDebugInfo(converter._debug_info)
def testExperimentalSparsifyModel(self):
def testSparsifyModel(self):
self._getSequentialModel()
converter = lite.TocoConverter.from_keras_model_file(self._keras_file)
converter._experimental_sparsify_model = True
converter = lite.TFLiteConverter.from_keras_model_file(self._keras_file)
converter.optimizations = {lite.Optimize.EXPERIMENTAL_SPARSITY}
tflite_model = converter.convert()
self.assertTrue(tflite_model)
def testSparsifyQuantizedModel(self):
self._getSequentialModel()
converter = lite.TFLiteConverter.from_keras_model_file(self._keras_file)
converter.optimizations = {
lite.Optimize.DEFAULT, lite.Optimize.EXPERIMENTAL_SPARSITY
}
tflite_model = converter.convert()
self.assertIsNotNone(tflite_model)

View File

@ -5,6 +5,10 @@ tf_class {
name: "DEFAULT"
mtype: "<enum \'Optimize\'>"
}
member {
name: "EXPERIMENTAL_SPARSITY"
mtype: "<enum \'Optimize\'>"
}
member {
name: "OPTIMIZE_FOR_LATENCY"
mtype: "<enum \'Optimize\'>"

View File

@ -5,6 +5,10 @@ tf_class {
name: "DEFAULT"
mtype: "<enum \'Optimize\'>"
}
member {
name: "EXPERIMENTAL_SPARSITY"
mtype: "<enum \'Optimize\'>"
}
member {
name: "OPTIMIZE_FOR_LATENCY"
mtype: "<enum \'Optimize\'>"