Removes two Keras fallbacks to older v1 code:

1. when calling predict without compiling first. 2. when using a tf.compat.v1.optimizers.Optimizer (these work eagerly & should work in the v2 loops) This revealed some missing coverage in the tests & a few bugs. The fixes to these issues are are: 1. Int sample weights used with masks now work correctly (w/o type crashes) 2. Keras in TF2 explicitly disallows using sparse scipy matrices as inputs to dense functional model inputs. A meaningful error is now raised whereas before this crashed w/ unclear messages. In v1 the scipy matrices were silently cast to dense arrays, but this causes ambiguity for subclass models & can't be done w/o too much extra complexity for the data adapters. 3. This cl needs to mark a few tests that depend on v2 dtype behavior as run_v2_only, and changes a few tests to v1_deprecated PiperOrigin-RevId: 276388163 Change-Id: I0c31e4e6aa16793f119f6f0184a93b294db3d030
2019-10-23 17:45:23 -07:00 · 2019-10-23 17:45:23 -07:00 · 268229dfed
commit 268229dfed
parent e0b4792e7b
16 changed files with 136 additions and 70 deletions
--- a/tensorflow/python/keras/distribute/keras_utils_test.py
+++ b/tensorflow/python/keras/distribute/keras_utils_test.py
@ -259,8 +259,7 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):

      dataset = keras_test_lib.get_dataset(distribution)
      exception_error_message = (
-          '`validation_split` argument is not supported when input `x`'
-          ' is a dataset or a dataset iterator.+')
+          '`validation_split` argument is not supported when ')

      # Test with validation split
      with self.assertRaisesRegexp(ValueError, exception_error_message):
--- a/tensorflow/python/keras/engine/data_adapter.py
+++ b/tensorflow/python/keras/engine/data_adapter.py
@ -415,6 +415,9 @@ class GenericArrayLikeDataAdapter(TensorLikeDataAdapter):
  as Numpy, but it ignores any case where all the inputs are Tensors or Numpy
  arrays (because that case is handled by the base TensorLikeDataAdapter).

+  It ignores scipy sparse matrices and Composite Tensors because those are
+  handled by the CompositeTensorDataAdapter.
+
  It also does not handle lists/tuples of scalars, because those are handled
  by the ListsOfScalarsDataAdapter.
  """
@ -434,7 +437,8 @@ class GenericArrayLikeDataAdapter(TensorLikeDataAdapter):
          hasattr(v, "__len__")
      )

-    if not TensorLikeDataAdapter.can_handle(x, y):
+    if (not TensorLikeDataAdapter.can_handle(x, y) and
+        not CompositeTensorDataAdapter.can_handle(x, y)):
      return all(_is_array_like(v) for v in flat_inputs)
    else:
      return False
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@ -155,7 +155,8 @@ class Model(network.Network):
    self._compile_distribution = False

    self._run_eagerly = None
-    self._experimental_run_tf_function = False
+    self._experimental_run_tf_function = (
+        ops.executing_eagerly_outside_functions())

  def get_weights(self):
    """Retrieves the weights of the model.
@ -308,11 +309,18 @@ class Model(network.Network):
            'Session arguments: %s' % (self._function_kwargs,))

    self._set_optimizer(optimizer)
-    is_any_optimizer_v1 = any(isinstance(opt, optimizers.Optimizer)
-                              for opt in nest.flatten(self.optimizer))
+    is_any_keras_optimizer_v1 = any(
+        (isinstance(opt, optimizers.Optimizer)
+         and not isinstance(opt, optimizers.TFOptimizer)
+        ) for opt in nest.flatten(self.optimizer))
+
+    if is_any_keras_optimizer_v1 and ops.executing_eagerly_outside_functions():
+      raise ValueError('`tf.compat.v1.keras` Optimizer (', optimizer, ') is '
+                       'not supported when eager execution is enabled. Use a '
+                       '`tf.keras` Optimizer instead, or disable eager '
+                       'execution.')

    if ((target_tensors is not None)
-        or is_any_optimizer_v1
        or not ops.executing_eagerly_outside_functions()):
      # Fallback out of things that aren't supported with v2 loops
      self._experimental_run_tf_function = False
@ -3302,6 +3310,11 @@ def _convert_scipy_sparse_tensor(value, expected_input):
  """
  if issparse is not None and issparse(value):
    if ops.is_dense_tensor_like(expected_input):
+      if ops.executing_eagerly_outside_functions():
+        # In TF2 we do not silently densify sparse matrices.
+        raise ValueError('A SciPy sparse matrix was passed to a model '
+                         'that expects dense inputs. Please densify your '
+                         'inputs first, such as by calling `x.toarray().')
      return value.toarray()
    else:
      sparse_coo = value.tocoo()
--- a/tensorflow/python/keras/engine/training_eager.py
+++ b/tensorflow/python/keras/engine/training_eager.py
@ -157,6 +157,7 @@ def _model_loss(model,
            weights = mask
          else:
            # Update dimensions of weights to match with mask if possible.
+            weights = math_ops.cast(weights, outs[i].dtype)
            mask, _, weights = (
                tf_losses_utils.squeeze_or_expand_dimensions(
                    mask, sample_weight=weights))
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@ -644,7 +644,28 @@ class TrainingTest(keras_parameterized.TestCase):
    input_a_np = np.random.random((10, 3))
    input_b_np = np.random.random((10, 4))

-    model.fit([np.ndarray.tolist(input_a_np)], [np.ndarray.tolist(input_b_np)],
+    # Test execution on inputs that are lists of scalars.
+    # TF2 and TF1 have slightly different semantics:
+    if (testing_utils.should_run_tf_function()
+        or testing_utils.should_run_eagerly()):
+      # In TF2 to avoid any ambiguity when there are nested lists
+      # the entire input gets converted to a
+      # single numpy array (& it only works in the case of a single io model)
+      model.fit(np.ndarray.tolist(input_a_np),
+                np.ndarray.tolist(input_b_np),
+                epochs=2,
+                batch_size=5,
+                verbose=2)
+    else:
+      # In TF1 there was logic to try disambiguating between the individual
+      # inputs when lists are nested. This allowed multi-io functional models
+      # to support lists of scalars as input, but it caused ambiguity issues
+      # for subclass models & made it trickier to pass multi-dimensional inputs
+      # as lists of scalars to single io models. This was an excessive amount
+      # of complexity for what boiled down to a convenience method we were
+      # mainly just using for writing tests.
+      model.fit([np.ndarray.tolist(input_a_np)],
+                [np.ndarray.tolist(input_b_np)],
                epochs=2,
                batch_size=5,
                verbose=2)
@ -837,12 +858,43 @@ class TrainingTest(keras_parameterized.TestCase):
    model = MyModel()
    self.assertIn('{"a": {}}', model.to_json())

-  @keras_parameterized.run_all_keras_modes
+  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
  def test_training_on_sparse_data_with_dense_placeholders(self):
-    # TODO(kaftan) Test seems to not work, file ticket
-    if testing_utils.should_run_eagerly() and context.executing_eagerly():
-      self.skipTest('Skipping running model eagerly.')
+    if scipy_sparse is None:
+      return

+    test_inputs = [
+        scipy_sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)
+    ]
+    test_outputs = [
+        scipy_sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)
+    ]
+    in1 = keras.layers.Input(shape=(3,))
+    in2 = keras.layers.Input(shape=(3,))
+    out1 = keras.layers.Dropout(0.5, name='dropout')(in1)
+    out2 = keras.layers.Dense(4, name='dense_1')(in2)
+    model = keras.Model([in1, in2], [out1, out2])
+    model.experimental_run_tf_function = testing_utils.should_run_tf_function()
+
+    with self.assertRaisesRegexp(ValueError, 'Please densify'):
+      model.predict(test_inputs, batch_size=2)
+    optimizer = 'rmsprop'
+    model.compile(
+        optimizer,
+        'mse',
+        metrics=['mae', metrics_module.CategoricalAccuracy()],
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+
+    with self.assertRaisesRegexp(ValueError, 'Please densify'):
+      model.fit(test_inputs, test_outputs,
+                epochs=1, batch_size=2)
+
+    with self.assertRaisesRegexp(ValueError, 'Please densify'):
+      model.evaluate(test_inputs, test_outputs, batch_size=2)
+
+  @tf_test_util.run_deprecated_v1
+  def test_training_on_sparse_data_with_dense_placeholders_v1(self):
    if scipy_sparse is None:
      return

@ -858,23 +910,17 @@ class TrainingTest(keras_parameterized.TestCase):
    out2 = keras.layers.Dense(4, name='dense_1')(in2)
    model = keras.Model([in1, in2], [out1, out2])
    model.predict(test_inputs, batch_size=2)
-    optimizer = RMSPropOptimizer(learning_rate=0.001)
+    optimizer = 'rmsprop'
    model.compile(
        optimizer,
        'mse',
-        metrics=['mae', metrics_module.CategoricalAccuracy()],
-        run_eagerly=testing_utils.should_run_eagerly(),
-        experimental_run_tf_function=testing_utils.should_run_tf_function())
+        metrics=['mae', metrics_module.CategoricalAccuracy()])
    model.fit(test_inputs, test_outputs,
              epochs=1, batch_size=2, validation_split=0.5)
    model.evaluate(test_inputs, test_outputs, batch_size=2)

  @keras_parameterized.run_all_keras_modes
  def test_compile_with_sparse_placeholders(self):
-    # TODO(kaftan) Test seems to not work, file ticket
-    if testing_utils.should_run_eagerly() and context.executing_eagerly():
-      self.skipTest('Skipping running model eagerly.')
-
    input_layer = keras.layers.Input(shape=(10,), sparse=True)
    weights = variables_lib.Variable(
        np.ones((10, 1)).astype(np.float32), name='weights')
@ -883,7 +929,7 @@ class TrainingTest(keras_parameterized.TestCase):
    model = keras.Model([input_layer], output_layer)
    model.compile(
        loss='binary_crossentropy',
-        optimizer=keras.optimizers.Adam(lr=0.0001),
+        optimizer='adam',
        metrics=['accuracy'],
        run_eagerly=testing_utils.should_run_eagerly(),
        experimental_run_tf_function=testing_utils.should_run_tf_function())
@ -2432,12 +2478,8 @@ class TestDynamicTrainability(keras_parameterized.TestCase):

 class TestTrainingWithDataTensors(keras_parameterized.TestCase):

-  @keras_parameterized.run_all_keras_modes
+  @tf_test_util.run_deprecated_v1
  def test_training_and_eval_methods_on_symbolic_tensors_single_io(self):
-    # TODO(kaftan) Test seems to not work, file ticket
-    if  context.executing_eagerly():
-      self.skipTest('Skipping eager execution.')
-
    x = keras.layers.Input(shape=(3,), name='input')
    y = keras.layers.Dense(4, name='dense')(x)
    model = keras.Model(x, y)
@ -2447,9 +2489,7 @@ class TestTrainingWithDataTensors(keras_parameterized.TestCase):
    model.compile(
        optimizer,
        loss,
-        metrics=['mae', metrics_module.CategoricalAccuracy()],
-        run_eagerly=testing_utils.should_run_eagerly(),
-        experimental_run_tf_function=testing_utils.should_run_tf_function())
+        metrics=['mae', metrics_module.CategoricalAccuracy()])

    inputs = keras.backend.zeros(shape=(10, 3))
    targets = keras.backend.zeros(shape=(10, 4))
@ -2478,12 +2518,8 @@ class TestTrainingWithDataTensors(keras_parameterized.TestCase):
              epochs=1, steps_per_epoch=2, verbose=0,
              validation_data=(inputs, targets), validation_steps=2)

-  @keras_parameterized.run_all_keras_modes
+  @tf_test_util.run_deprecated_v1
  def test_training_and_eval_methods_on_symbolic_tensors_multi_io(self):
-    # TODO(kaftan) Test seems to not work, file ticket
-    if context.executing_eagerly():
-      self.skipTest('Skipping eager execution.')
-
    a = keras.layers.Input(shape=(3,), name='input_a')
    b = keras.layers.Input(shape=(3,), name='input_b')

@ -2501,9 +2537,7 @@ class TestTrainingWithDataTensors(keras_parameterized.TestCase):
        optimizer,
        loss,
        metrics=['mae', metrics_module.CategoricalAccuracy()],
-        loss_weights=loss_weights,
-        run_eagerly=testing_utils.should_run_eagerly(),
-        experimental_run_tf_function=testing_utils.should_run_tf_function())
+        loss_weights=loss_weights)

    input_a_tf = keras.backend.zeros(shape=(10, 3))
    input_b_tf = keras.backend.zeros(shape=(10, 3))
@ -2717,6 +2751,7 @@ class TestTrainingWithDataTensors(keras_parameterized.TestCase):
      out = model.predict(None, steps=3)
      self.assertEqual(out.shape, (10 * 3, 4))

+  @keras_parameterized.run_all_keras_modes
  def test_model_with_partial_loss(self):
    with self.cached_session():
      a = keras.Input(shape=(3,), name='input_a')
@ -2884,6 +2919,7 @@ class TestTrainingWithDataTensors(keras_parameterized.TestCase):
      self.assertEqual(out[0].shape, (10 * 3, 4))
      self.assertEqual(out[1].shape, (10 * 3, 4))

+  @keras_parameterized.run_all_keras_modes
  def test_target_tensors(self):
    with self.cached_session():
      # single-output, as list
@ -3203,9 +3239,6 @@ class TestTrainingWithMetrics(keras_parameterized.TestCase):

  @keras_parameterized.run_all_keras_modes
  def test_metrics_masking(self):
-    if testing_utils.should_run_eagerly():
-      self.skipTest('b/120495761')
-    with self.cached_session():
    np.random.seed(1337)
    model = keras.models.Sequential()
    model.add(keras.layers.Masking(mask_value=0, input_shape=(2, 1)))
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@ -1063,6 +1063,7 @@ def call_metric_function(metric_fn,
      weights = mask
    else:
      # Update dimensions of weights to match with mask.
+      weights = math_ops.cast(weights, dtype=y_pred.dtype)
      mask, _, weights = tf_losses_utils.squeeze_or_expand_dimensions(
          mask, sample_weight=weights)
      weights *= mask
--- a/tensorflow/python/keras/engine/training_v2_utils.py
+++ b/tensorflow/python/keras/engine/training_v2_utils.py
@ -360,7 +360,7 @@ def predict_on_batch(model, x):

  # If `model._distribution_strategy` is True, then we are in a replica context
  # at this point.
-  inputs = training_utils.cast_if_floating_dtype(inputs)
+  inputs = training_utils.cast_to_model_input_dtypes(inputs, model)
  if isinstance(inputs, collections.Sequence):
    # Unwrap lists with only one input, as we do when training on batch
    if len(inputs) == 1:
--- a/tensorflow/python/keras/layers/gru_test.py
+++ b/tensorflow/python/keras/layers/gru_test.py
@ -44,6 +44,7 @@ class GRULayerTest(keras_parameterized.TestCase):
                'return_sequences': True},
        input_shape=(num_samples, timesteps, embedding_dim))

+  @tf_test_util.run_v2_only
  def test_float64_GRU(self):
    num_samples = 2
    timesteps = 3
--- a/tensorflow/python/keras/layers/gru_v2_test.py
+++ b/tensorflow/python/keras/layers/gru_v2_test.py
@ -343,6 +343,7 @@ class GRUV2Test(keras_parameterized.TestCase):
                'return_sequences': True},
        input_shape=(num_samples, timesteps, embedding_dim))

+  @test_util.run_v2_only
  def test_float64_GRU(self):
    num_samples = 2
    timesteps = 3
--- a/tensorflow/python/keras/layers/lstm_test.py
+++ b/tensorflow/python/keras/layers/lstm_test.py
@ -23,6 +23,7 @@ import numpy as np

 from tensorflow.python import keras
 from tensorflow.python.eager import context
+from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
@ -44,6 +45,7 @@ class LSTMLayerTest(keras_parameterized.TestCase):
                'return_sequences': True},
        input_shape=(num_samples, timesteps, embedding_dim))

+  @tf_test_util.run_v2_only
  def test_float64_LSTM(self):
    num_samples = 2
    timesteps = 3
--- a/tensorflow/python/keras/layers/lstm_v2_test.py
+++ b/tensorflow/python/keras/layers/lstm_v2_test.py
@ -570,6 +570,7 @@ class LSTMV2Test(keras_parameterized.TestCase):
        },
        input_shape=(num_samples, timesteps, embedding_dim))

+  @test_util.run_v2_only
  def test_float64_LSTM(self):
    num_samples = 2
    timesteps = 3
--- a/tensorflow/python/keras/layers/recurrent_test.py
+++ b/tensorflow/python/keras/layers/recurrent_test.py
@ -1477,7 +1477,7 @@ class RNNTest(keras_parameterized.TestCase):
              input_layer, initial_state=initial_states)
      model = keras.Model(input_layer, rnn_output)
      model.compile(
-          optimizer=keras.optimizers.RMSprop(), loss='mse',
+          optimizer='rmsprop', loss='mse',
          run_eagerly=testing_utils.should_run_eagerly(),
          experimental_run_tf_function=testing_utils.should_run_tf_function())
      return model
--- a/tensorflow/python/keras/layers/simplernn_test.py
+++ b/tensorflow/python/keras/layers/simplernn_test.py
@ -22,6 +22,7 @@ import numpy as np

 from tensorflow.python import keras
 from tensorflow.python.eager import context
+from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
@ -42,6 +43,7 @@ class SimpleRNNLayerTest(keras_parameterized.TestCase):
                'return_sequences': True},
        input_shape=(num_samples, timesteps, embedding_dim))

+  @tf_test_util.run_v2_only
  def test_float64_SimpleRNN(self):
    num_samples = 2
    timesteps = 3
--- a/tensorflow/python/keras/layers/wrappers_test.py
+++ b/tensorflow/python/keras/layers/wrappers_test.py
@ -376,6 +376,8 @@ class TimeDistributedTest(keras_parameterized.TestCase):
          layer=[keras.layers.LSTM,
                 keras.layers.Dense]))
  def test_TimeDistributed_with_ragged_input(self, layer):
+    if testing_utils.should_run_tf_function():
+      self.skipTest('b/143103634')
    np.random.seed(100)
    layer = layer(4)
    ragged_data = ragged_factory_ops.constant(
@ -387,6 +389,9 @@ class TimeDistributedTest(keras_parameterized.TestCase):
    x_ragged = keras.Input(shape=(None, 2, 1), dtype='float32', ragged=True)
    y_ragged = keras.layers.TimeDistributed(layer)(x_ragged)
    model_1 = keras.models.Model(x_ragged, y_ragged)
+    model_1._experimental_run_tf_function = (
+        testing_utils.should_run_tf_function())
+    model_1._run_eagerly = testing_utils.should_run_eagerly()
    output_ragged = model_1.predict(ragged_data, steps=1)

    x_dense = keras.Input(shape=(None, 2, 1), dtype='float32')
@ -394,6 +399,9 @@ class TimeDistributedTest(keras_parameterized.TestCase):
    y_dense = keras.layers.TimeDistributed(layer)(masking)
    model_2 = keras.models.Model(x_dense, y_dense)
    dense_data = ragged_data.to_tensor()
+    model_2._experimental_run_tf_function = (
+        testing_utils.should_run_tf_function())
+    model_2._run_eagerly = testing_utils.should_run_eagerly()
    output_dense = model_2.predict(dense_data, steps=1)

    output_ragged = ragged_tensor.convert_to_tensor_or_ragged_tensor(
--- a/tensorflow/python/keras/saving/hdf5_format_test.py
+++ b/tensorflow/python/keras/saving/hdf5_format_test.py
@ -315,7 +315,7 @@ class TestWeightSavingAndLoading(test.TestCase, parameterized.TestCase):
                                       name='d1'))
      ref_model.add(keras.layers.Dense(num_classes, name='d2'))
      ref_model.compile(loss=keras.losses.MSE,
-                        optimizer=keras.optimizers.RMSprop(lr=0.0001),
+                        optimizer='rmsprop',
                        metrics=[keras.metrics.categorical_accuracy])

      f_ref_model = h5py.File(h5_path, 'w')
@ -327,7 +327,7 @@ class TestWeightSavingAndLoading(test.TestCase, parameterized.TestCase):
                                   input_dim=input_dim, name='d1'))
      model.add(keras.layers.Dense(num_classes, name='d2'))
      model.compile(loss=keras.losses.MSE,
-                    optimizer=keras.optimizers.RMSprop(lr=0.0001),
+                    optimizer='rmsprop',
                    metrics=[keras.metrics.categorical_accuracy])
    with self.assertRaisesRegexp(ValueError,
                                 r'Layer #0 \(named \"d1\"\) expects 1 '
--- a/tensorflow/python/keras/saving/save_test.py
+++ b/tensorflow/python/keras/saving/save_test.py
@ -103,7 +103,7 @@ class TestSaveModel(test.TestCase):

    model.compile(
        loss=keras.losses.MSE,
-        optimizer=keras.optimizers.RMSprop(lr=0.0001),
+        optimizer='rmsprop',
        metrics=[keras.metrics.categorical_accuracy])

    config = model.to_json()
@ -145,7 +145,7 @@ class TestSaveModel(test.TestCase):

    model.compile(
        loss=keras.losses.MSE,
-        optimizer=keras.optimizers.RMSprop(lr=0.0001),
+        optimizer='rmsprop',
        metrics=[keras.metrics.categorical_accuracy])

    config = model.to_json()