Add PreprocessingStage (sequential) class.

PiperOrigin-RevId: 295230713 Change-Id: Id2f50b7f3cb0c11e6a59e311d768a9edf9cdc057
2020-02-14 14:42:43 -08:00 · 2020-02-14 14:42:43 -08:00 · 6fc343905a
commit 6fc343905a
parent 79ed5077ce
7 changed files with 251 additions and 20 deletions
--- a/tensorflow/python/keras/engine/base_preprocessing_layer.py
+++ b/tensorflow/python/keras/engine/base_preprocessing_layer.py
@ -22,7 +22,6 @@ import collections
 import numpy as np
 from tensorflow.python.data.experimental.ops import cardinality
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
@ -32,7 +31,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.engine import training_generator
 from tensorflow.python.keras.engine.base_layer import Layer
-from tensorflow.python.ops import math_ops
+from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.util.tf_export import keras_export
@ -123,11 +122,6 @@ class CombinerPreprocessingLayer(PreprocessingLayer):
      data_dict[name] = var.numpy()
    return data_dict
  def _dataset_is_infinite(self, dataset):
    """True if the passed dataset is infinite."""
    return math_ops.equal(
        cardinality.cardinality(dataset), cardinality.INFINITE)
  def _get_dataset_iterator(self, dataset):
    """Gets an iterator from a tf.data.Dataset."""
    return dataset_ops.make_one_shot_iterator(dataset).get_next
@ -148,18 +142,20 @@ class CombinerPreprocessingLayer(PreprocessingLayer):
    else:
      accumulator = self._combiner.restore(self._restore_updates())
-    if not isinstance(data, (dataset_ops.DatasetV2, np.ndarray)):
+    if not isinstance(data,
                      (dataset_ops.DatasetV2, np.ndarray, ops.EagerTensor)):
      raise ValueError(
-          'adapt() requires a Dataset or a Numpy array as input, got {}'.format(
+          '`adapt()` requires a batched Dataset, an EagerTensor, '
-              type(data)))
+          'or a Numpy array as input, '
          'got {}'.format(type(data)))
    if isinstance(data, dataset_ops.DatasetV2):
      # Validate the datasets to try and ensure we haven't been passed one with
      # infinite size. That would cause an infinite loop here.
-      if self._dataset_is_infinite(data):
+      if tf_utils.dataset_is_infinite(data):
        raise ValueError(
-            'The dataset passed to "adapt()" has an infinite number of '
+            'The dataset passed to `adapt()` has an infinite number of '
-            'elements. Please use dataset.take(...) to make the number '
+            'elements. Please use `dataset.take(...)` to make the number '
            'of elements finite.')
      next_data = self._get_dataset_iterator(data)
    else:
--- a/tensorflow/python/keras/engine/base_preprocessing_layer_test.py
+++ b/tensorflow/python/keras/engine/base_preprocessing_layer_test.py
@ -135,7 +135,7 @@ class PreprocessingLayerTest(keras_parameterized.TestCase):
    input_dataset = [1, 2, 3, 4, 5]
    layer = get_layer()
-    with self.assertRaisesRegex(ValueError, ".*a Dataset or a Numpy.*"):
+    with self.assertRaisesRegex(ValueError, "requires a"):
      layer.adapt(input_dataset)
  def test_adapt_infinite_dataset_fails(self):
--- a/tensorflow/python/keras/engine/base_preprocessing_layer_v1.py
+++ b/tensorflow/python/keras/engine/base_preprocessing_layer_v1.py
@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from tensorflow.python.data.experimental.ops import cardinality
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.engine import base_preprocessing_layer
@ -54,11 +53,6 @@ class CombinerPreprocessingLayer(
      data_dict[name] = K.get_session().run(var)
    return data_dict
  def _dataset_is_infinite(self, dataset):
    """True if the passed dataset is infinite."""
    dataset_size = K.get_session().run(cardinality.cardinality(dataset))
    return dataset_size == cardinality.INFINITE
  def _get_dataset_iterator(self, dataset):
    """Gets an iterator from a tf.data.Dataset."""
    iterator = dataset_ops.make_one_shot_iterator(dataset)
--- a/tensorflow/python/keras/layers/preprocessing/BUILD
+++ b/tensorflow/python/keras/layers/preprocessing/BUILD
@ -23,6 +23,7 @@ py_library(
        ":hashing",
        ":image_preprocessing",
        ":normalization",
        ":preprocessing_stage",
        ":preprocessing_test_utils",
        ":reduction",
        ":text_vectorization",
@ -204,6 +205,20 @@ py_library(
    ],
 )
 py_library(
    name = "preprocessing_stage",
    srcs = [
        "preprocessing_stage.py",
    ],
    srcs_version = "PY2AND3",
    deps = [
        "//tensorflow/python:framework_ops",
        "//tensorflow/python/data/ops:dataset_ops",
        "//tensorflow/python/keras:engine",
        "//tensorflow/python/keras/engine:base_preprocessing_layer",
    ],
 )
 py_library(
    name = "preprocessing_test_utils",
    srcs = ["preprocessing_test_utils.py"],
@ -344,3 +359,16 @@ tf_py_test(
        "@absl_py//absl/testing:parameterized",
    ],
 )
 tf_py_test(
    name = "preprocessing_stage_test",
    srcs = ["preprocessing_stage_test.py"],
    python_version = "PY3",
    deps = [
        ":preprocessing_stage",
        "//tensorflow/python:client_testlib",
        "//tensorflow/python/keras",
        "//third_party/py/numpy",
        "@absl_py//absl/testing:parameterized",
    ],
 )
--- a/tensorflow/python/keras/layers/preprocessing/preprocessing_stage.py
+++ b/tensorflow/python/keras/layers/preprocessing/preprocessing_stage.py
@ -0,0 +1,96 @@
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Preprocessing stage."""
 # pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import numpy as np
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import ops
 from tensorflow.python.keras.engine import base_preprocessing_layer
 from tensorflow.python.keras.engine import sequential
 from tensorflow.python.keras.utils import tf_utils
 class PreprocessingStage(base_preprocessing_layer.PreprocessingLayer,
                         sequential.Sequential):
  """A sequential preprocessing stage.
  This preprocessing stage wraps a list of preprocessing layers into a
  Sequential-like object that enables you to `adapt()` the whole list via
  a single `adapt()` call on the preprocessing stage.
  Arguments:
    layers: List of layers. Can include layers that aren't preprocessing layers.
    name: String. Optional name for the preprocessing stage object.
  """
  def adapt(self, data, reset_state=True):
    """Adapt the state of the layers of the preprocessing stage to the data.
    Arguments:
      data: A batched Dataset object, or a NumPy array, or an EagerTensor.
        Data to be iterated over to adapt the state of the layers in this
        preprocessing stage.
      reset_state: Whether this call to `adapt` should reset the state of
        the layers in this preprocessing stage.
    """
    if not isinstance(data,
                      (dataset_ops.DatasetV2, np.ndarray, ops.EagerTensor)):
      raise ValueError(
          '`adapt()` requires a batched Dataset, an EagerTensor, '
          'or a Numpy array as input, '
          'got {}'.format(type(data)))
    if isinstance(data, dataset_ops.DatasetV2):
      # Validate the datasets to try and ensure we haven't been passed one with
      # infinite size. That would cause an infinite loop here.
      if tf_utils.dataset_is_infinite(data):
        raise ValueError(
            'The dataset passed to `adapt()` has an infinite number of '
            'elements. Please use dataset.take(...) to make the number '
            'of elements finite.')
    for current_layer_index in range(0, len(self.layers)):
      if not hasattr(self.layers[current_layer_index], 'adapt'):
        # Skip any layer that does not need adapting.
        continue
      def map_fn(x):
        """Maps `PreprocessingStage` inputs to inputs at `current_layer_index`.
        Args:
          x: Batch of inputs seen in entry of the `PreprocessingStage` instance.
        Returns:
          Batch of inputs to be processed by layer
            `self.layers[current_layer_index]`
        """
        if current_layer_index == 0:  # pylint: disable=cell-var-from-loop
          return x
        for i in range(current_layer_index):  # pylint: disable=cell-var-from-loop
          x = self.layers[i](x)
        return x
      if isinstance(data, dataset_ops.DatasetV2):
        current_layer_data = data.map(map_fn)
      else:
        current_layer_data = map_fn(data)
      self.layers[current_layer_index].adapt(current_layer_data,
                                             reset_state=reset_state)
--- a/tensorflow/python/keras/layers/preprocessing/preprocessing_stage_test.py
+++ b/tensorflow/python/keras/layers/preprocessing/preprocessing_stage_test.py
@ -0,0 +1,105 @@
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Preprocessing stage tests."""
 # pylint: disable=g-classes-have-attributes
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import time
 import numpy as np
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras.engine import base_preprocessing_layer
 from tensorflow.python.keras.layers import convolutional
 from tensorflow.python.keras.layers.preprocessing import image_preprocessing
 from tensorflow.python.keras.layers.preprocessing import normalization
 from tensorflow.python.keras.layers.preprocessing import preprocessing_stage
 from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
 class PreprocessingStageTest(
    keras_parameterized.TestCase,
    preprocessing_test_utils.PreprocessingLayerTest):
  def test_adapt(self):
    class PL(base_preprocessing_layer.PreprocessingLayer):
      def __init__(self, **kwargs):
        self.adapt_time = None
        self.adapt_count = 0
        super(PL, self).__init__(**kwargs)
      def adapt(self, data, reset_state=True):
        self.adapt_time = time.time()
        self.adapt_count += 1
      def call(self, inputs):
        return inputs + 1.
    # Test with NumPy array
    stage = preprocessing_stage.PreprocessingStage([
        PL(),
        PL(),
        PL(),
    ])
    stage.adapt(np.ones((3, 4)))
    self.assertEqual(stage.layers[0].adapt_count, 1)
    self.assertEqual(stage.layers[1].adapt_count, 1)
    self.assertEqual(stage.layers[2].adapt_count, 1)
    self.assertLess(stage.layers[0].adapt_time, stage.layers[1].adapt_time)
    self.assertLess(stage.layers[1].adapt_time, stage.layers[2].adapt_time)
    # Check call
    y = stage(array_ops.ones((3, 4)))
    self.assertAllClose(y, np.ones((3, 4)) + 3.)
    # Test with dataset
    adapt_data = dataset_ops.Dataset.from_tensor_slices(np.ones((3, 10)))
    adapt_data = adapt_data.batch(2)  # 5 batches of 2 samples
    stage.adapt(adapt_data)
    self.assertEqual(stage.layers[0].adapt_count, 2)
    self.assertEqual(stage.layers[1].adapt_count, 2)
    self.assertEqual(stage.layers[2].adapt_count, 2)
    self.assertLess(stage.layers[0].adapt_time, stage.layers[1].adapt_time)
    self.assertLess(stage.layers[1].adapt_time, stage.layers[2].adapt_time)
    # Test error with bad data
    with self.assertRaisesRegex(ValueError, 'requires a '):
      stage.adapt(None)
  def test_mixing_preprocessing_and_regular_layers(self):
    stage = preprocessing_stage.PreprocessingStage([
        image_preprocessing.CenterCrop(16, 16),
        normalization.Normalization(),
        convolutional.Conv2D(4, 3)
    ])
    data = np.ones((16, 20, 20, 3), dtype='float32')
    stage.adapt(data)
    _ = stage(data)
    stage.compile('rmsprop', 'mse')
    stage.fit(data, np.ones((16, 14, 14, 4)))
    _ = stage.evaluate(data, np.ones((16, 14, 14, 4)))
    _ = stage.predict(data)
 if __name__ == '__main__':
  test.main()
--- a/tensorflow/python/keras/utils/tf_utils.py
+++ b/tensorflow/python/keras/utils/tf_utils.py
@ -19,6 +19,7 @@ from __future__ import print_function
 import six
 from tensorflow.python.data.experimental.ops import cardinality
 from tensorflow.python.eager import context
 from tensorflow.python.framework import composite_tensor
 from tensorflow.python.framework import ops
@ -29,6 +30,7 @@ from tensorflow.python.framework import tensor_util
 from tensorflow.python.framework import type_spec
 from tensorflow.python.keras import backend as K
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.util import nest
 from tensorflow.python.util import object_identity
@ -452,3 +454,13 @@ def graph_context_for_symbolic_tensors(*args, **kwargs):
      yield
  else:
    yield
 def dataset_is_infinite(dataset):
  """True if the passed dataset is infinite."""
  if ops.executing_eagerly_outside_functions():
    return math_ops.equal(
        cardinality.cardinality(dataset), cardinality.INFINITE)
  else:
    dataset_size = K.get_session().run(cardinality.cardinality(dataset))
    return dataset_size == cardinality.INFINITE