Converter API change to enable optimization with sparsity.

To enable optimization with sparsity, use: converter.optimizations = [tf.lite.Optimize.SPARSITY] converter.convert() Note: 1) This feature is experimental 2) It requires the use of during training pruning to be effective. 3) Not all kernels have been optimized for sparse execution, so the initial benefit will primarily be in the model size on disk. PiperOrigin-RevId: 351245576 Change-Id: I2a771f2a5ead92bdf93821af9f8058b0957b5aef
2021-01-11 15:13:05 -08:00 · 2021-01-11 15:13:05 -08:00 · a0e249979f
commit a0e249979f
parent 022122691d
5 changed files with 53 additions and 7 deletions
--- a/RELEASE.md
+++ b/RELEASE.md
@ -78,7 +78,8 @@
        * TFLiteConverter exports models with SignatureDef
        * Interpreter supports getting a list of signatures and getting callable
          function for a given signaturedef.
-    * Add int8 support for `ReshapeV2`.
+    *  Add int8 support for `ReshapeV2`.
+    *  Add experimental support for optimization with sparsity.
 *   TF Core:
    *   Corrected higher-order gradients of control flow constructs (`tf.cond`,
        `tf.while_loop`, and compositions like `tf.foldl`) computed with
--- a/tensorflow/lite/python/lite.py
+++ b/tensorflow/lite/python/lite.py
@ -104,6 +104,18 @@ class Optimize(enum.Enum):

  OPTIMIZE_FOR_LATENCY
      Deprecated. Does the same as DEFAULT.
+
+  EXPERIMENTAL_SPARSITY
+      Experimental flag, subject to change.
+
+      Enable optimization by taking advantage of the sparse model weights
+      trained with pruning.
+
+      The converter will inspect the sparsity pattern of the model weights and
+      do its best to improve size and latency.
+      The flag can be used alone to optimize float32 models with sparse weights.
+      It can also be used together with the DEFAULT optimization mode to
+      optimize quantized models with sparse weights.
  """

  # Default optimization strategy that quantizes model weights. Enhanced
@ -119,6 +131,18 @@ class Optimize(enum.Enum):
  # Deprecated. Does the same as DEFAULT.
  OPTIMIZE_FOR_LATENCY = "OPTIMIZE_FOR_LATENCY"

+  # Experimental flag, subject to change.
+  # Enable optimization by taking advantage of the sparse model weights trained
+  # with pruning.
+  #
+  # The converter will inspect the sparsity pattern of the model weights and do
+  # its best to improve size and latency.
+  # The flag can be used alone to optimize float32 models with sparse weights.
+  # It can also be used together with the DEFAULT optimization mode to optimize
+  # quantized models with sparse weights.
+  # TODO(b/161560631): Add log message when this optimization is applied.
+  EXPERIMENTAL_SPARSITY = "EXPERIMENTAL_SPARSITY"
+
  def __str__(self):
    return str(self.value)

@ -538,6 +562,9 @@ class TFLiteConverterBase(object):
        raise ValueError("SavedModel file format({0}) is not supported".format(
            self._saved_model_version))

+  def _sparsify_model(self):
+    return Optimize.EXPERIMENTAL_SPARSITY in self.optimizations
+

 class TFLiteConverterBaseV2(TFLiteConverterBase):
  """Converter subclass to share functionality between V2 converters."""
@ -643,7 +670,7 @@ class TFLiteConverterBaseV2(TFLiteConverterBase):
    if flags_modify_model_io_type:
      result = _modify_model_io_type(result, **flags_modify_model_io_type)

-    if self._experimental_sparsify_model:
+    if self._sparsify_model():
      result = _mlir_sparsify(result)

    return result
@ -752,7 +779,7 @@ class TFLiteSavedModelConverterV2(TFLiteConverterBaseV2):
    if flags_modify_model_io_type:
      result = _modify_model_io_type(result, **flags_modify_model_io_type)

-    if self._experimental_sparsify_model:
+    if self._sparsify_model():
      result = _mlir_sparsify(result)

    return result
@ -1337,7 +1364,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase):
      if flags_modify_model_io_type:
        result = _modify_model_io_type(result, **flags_modify_model_io_type)

-    if self._experimental_sparsify_model:
+    if self._sparsify_model():
      result = _mlir_sparsify(result)

    return result
--- a/tensorflow/lite/python/lite_test.py
+++ b/tensorflow/lite/python/lite_test.py
@ -2622,11 +2622,21 @@ class FromKerasFile(TestModels, parameterized.TestCase):
      converter.convert()
      self.assertValidDebugInfo(converter._debug_info)

-  def testExperimentalSparsifyModel(self):
+  def testSparsifyModel(self):
    self._getSequentialModel()

-    converter = lite.TocoConverter.from_keras_model_file(self._keras_file)
-    converter._experimental_sparsify_model = True
+    converter = lite.TFLiteConverter.from_keras_model_file(self._keras_file)
+    converter.optimizations = {lite.Optimize.EXPERIMENTAL_SPARSITY}
+    tflite_model = converter.convert()
+    self.assertTrue(tflite_model)
+
+  def testSparsifyQuantizedModel(self):
+    self._getSequentialModel()
+
+    converter = lite.TFLiteConverter.from_keras_model_file(self._keras_file)
+    converter.optimizations = {
+        lite.Optimize.DEFAULT, lite.Optimize.EXPERIMENTAL_SPARSITY
+    }
    tflite_model = converter.convert()
    self.assertIsNotNone(tflite_model)

--- a/tensorflow/tools/api/golden/v1/tensorflow.lite.-optimize.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.-optimize.pbtxt
@ -5,6 +5,10 @@ tf_class {
    name: "DEFAULT"
    mtype: "<enum \'Optimize\'>"
  }
+  member {
+    name: "EXPERIMENTAL_SPARSITY"
+    mtype: "<enum \'Optimize\'>"
+  }
  member {
    name: "OPTIMIZE_FOR_LATENCY"
    mtype: "<enum \'Optimize\'>"
--- a/tensorflow/tools/api/golden/v2/tensorflow.lite.-optimize.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.-optimize.pbtxt
@ -5,6 +5,10 @@ tf_class {
    name: "DEFAULT"
    mtype: "<enum \'Optimize\'>"
  }
+  member {
+    name: "EXPERIMENTAL_SPARSITY"
+    mtype: "<enum \'Optimize\'>"
+  }
  member {
    name: "OPTIMIZE_FOR_LATENCY"
    mtype: "<enum \'Optimize\'>"