tf.distribute.Strategy.reduce: Remove default value for axis argument for Strategy V2, but keep for Strategy V1.

Change all callers to specify the argument, so their code continues to work with Strategy V2. For now most of the callers use axis=None. If this continues to remain the most common case, we can consider adding back the default in the future. PiperOrigin-RevId: 243462195
2019-04-13 19:58:00 -07:00 · 2019-04-13 19:58:00 -07:00 · 471993acd6
commit 471993acd6
parent 06d2ea93fa
17 changed files with 33 additions and 21 deletions
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
@ -293,7 +293,7 @@ class CollectiveAllReduceStrategyTestBase(
        return array_ops.identity(x)

      x = distribution.extended.call_for_each_replica(model_fn)
-      reduced_x = distribution.reduce(reduce_util.ReduceOp.MEAN, x)
+      reduced_x = distribution.reduce(reduce_util.ReduceOp.MEAN, x, axis=None)
      x = distribution.experimental_local_results(x)[0]

      sess.run(variables.global_variables_initializer())
--- a/tensorflow/python/distribute/distribute_lib.py
+++ b/tensorflow/python/distribute/distribute_lib.py
@ -438,7 +438,7 @@ class Strategy(object):
    with self.scope():
      return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)

-  def reduce(self, reduce_op, value, axis=None):
+  def reduce(self, reduce_op, value, axis):
    """Reduce `value` across replicas.

    Given a per-replica value returned by `experimental_run_v2`, say a
@ -468,8 +468,10 @@ class Strategy(object):
        be combined.
      value: A "per replica" value, e.g. returned by `experimental_run_v2` to
        be combined into a single tensor.
-      axis: Optional. Specifies the dimension to reduce along within each
-        replica's tensor. Should typically be set to the batch dimension.
+      axis: Specifies the dimension to reduce along within each
+        replica's tensor. Should typically be set to the batch dimension, or
+        `None` to only reduce across replicas (e.g. if the tensor has no batch
+        dimension).

    Returns:
      A `Tensor`.
@ -729,6 +731,11 @@ class StrategyV1(Strategy):
    return super(StrategyV1, self).experimental_run(
        fn, input_iterator)

+  def reduce(self, reduce_op, value, axis=None):
+    return super(StrategyV1, self).reduce(reduce_op, value, axis)
+
+  reduce.__doc__ = Strategy.reduce.__doc__
+
  def update_config_proto(self, config_proto):
    """Returns a copy of `config_proto` modified for use with this strategy.

--- a/tensorflow/python/distribute/distribute_lib_test.py
+++ b/tensorflow/python/distribute/distribute_lib_test.py
@ -321,7 +321,7 @@ class TestStrategyTest(test.TestCase):
  @_run_in_and_out_of_scope
  def testReduce(self, dist):
    x = constant_op.constant(1.)
-    x_r = dist.reduce(reduce_util.ReduceOp.MEAN, x)
+    x_r = dist.reduce(reduce_util.ReduceOp.MEAN, x, axis=None)
    self.assertEqual(self.evaluate(x), self.evaluate(x_r))

  def testReductions_acceptStringOps(self):
@ -329,7 +329,7 @@ class TestStrategyTest(test.TestCase):
    for op in ("mean", "MEAN", "sum", "SUM"):
      x = constant_op.constant(1.)
      y = constant_op.constant(1.)
-      x_r = dist.reduce(op, x)
+      x_r = dist.reduce(op, x, axis=None)
      self.assertEqual(self.evaluate(x), self.evaluate(x_r))
      x_r = dist.extended.reduce_to(op, x, "/CPU:0")
      self.assertEqual(self.evaluate(x), self.evaluate(x_r))
--- a/tensorflow/python/distribute/input_lib.py
+++ b/tensorflow/python/distribute/input_lib.py
@ -804,11 +804,13 @@ class MultiStepContext(object):
        self._last_step_outputs[name] = output
      else:
        distribution = distribution_strategy_context.get_strategy()
-        self._last_step_outputs[name] = distribution.reduce(reduce_op, output)
+        self._last_step_outputs[name] = distribution.reduce(reduce_op, output,
+                                                            axis=None)
    else:
      assert reduce_op is not None
      def merge_fn(distribution, value):
-        self._last_step_outputs[name] = distribution.reduce(reduce_op, value)
+        self._last_step_outputs[name] = distribution.reduce(reduce_op, value,
+                                                            axis=None)
        # Setting this inside the `merge_fn` because all replicas share the same
        # context object, so it's more robust to set it only once (even if all
        # the replicas are trying to set the same value).
--- a/tensorflow/python/distribute/minimize_loss_test.py
+++ b/tensorflow/python/distribute/minimize_loss_test.py
@ -514,7 +514,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
    if not reduced:
      self.assertLen(distribution.experimental_local_results(loss_output),
                     distribution.num_replicas_in_sync)
-      loss_tensor = distribution.reduce(reduce_util.ReduceOp.MEAN, loss_output)
+      loss_tensor = distribution.reduce(reduce_util.ReduceOp.MEAN, loss_output,
+                                        axis=None)
    else:
      unwrapped_output = distribution.experimental_local_results(loss_output)
      self.assertLen(unwrapped_output, 1)
--- a/tensorflow/python/distribute/mirrored_strategy_test.py
+++ b/tensorflow/python/distribute/mirrored_strategy_test.py
@ -103,7 +103,7 @@ class MirroredTwoDeviceDistributionTest(
  def testReduceToCpu(self, distribution):
    with distribution.scope():
      result = distribution.extended.call_for_each_replica(_replica_id)
-      reduced = distribution.reduce(reduce_util.ReduceOp.SUM, result)
+      reduced = distribution.reduce(reduce_util.ReduceOp.SUM, result, axis=None)
      expected = sum(range(distribution.num_replicas_in_sync))
      self.assertEqual(expected, self.evaluate(reduced))

--- a/tensorflow/python/distribute/values.py
+++ b/tensorflow/python/distribute/values.py
@ -1310,7 +1310,8 @@ class SyncOnReadVariable(DistributedVariable, PerReplica):
    if self._aggregation == vs.VariableAggregation.ONLY_FIRST_REPLICA:
      return self.primary
    return self._distribute_strategy.reduce(
-        reduce_util.ReduceOp.from_variable_aggregation(self.aggregation), self)
+        reduce_util.ReduceOp.from_variable_aggregation(self.aggregation), self,
+        axis=None)

  def _as_graph_element(self):
    # pylint: disable=protected-access
--- a/tensorflow/python/keras/distribute/distributed_training_utils.py
+++ b/tensorflow/python/keras/distribute/distributed_training_utils.py
@ -107,7 +107,7 @@ def unwrap_values(distribution_strategy, grouped_inputs, grouped_outputs,
  if with_loss_tensor:
    # reduce loss tensor before adding it to the list of fetches
    loss = distribution_strategy.reduce(reduce_util.ReduceOp.SUM,
-                                        grouped_outputs[0])
+                                        grouped_outputs[0], axis=None)
    all_outputs = flatten_perdevice_values(distribution_strategy,
                                           grouped_outputs[1:])
    all_outputs = [loss] + all_outputs
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@ -497,7 +497,8 @@ def experimental_tpu_test_loop(model,
      # We reduce all other metrics using mean for now. This is temporary
      # workaround until new metrics are in place.
      reduce_op = ds_reduce_util.ReduceOp.MEAN
-    output_tensors[label] = current_strategy.reduce(reduce_op, output)
+    output_tensors[label] = current_strategy.reduce(reduce_op, output,
+                                                    axis=None)
  test_op = control_flow_ops.group(list(output_tensors.values()))

  if verbose >= 1:
--- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale.py
@ -289,7 +289,7 @@ class DynamicLossScale(LossScale):
      is_finite_float = distribution.extended.call_for_each_replica(
          get_is_finite, args=(grads,))
      reduced_is_finite_float = distribution.reduce(reduce_util.ReduceOp.SUM,
-                                                    is_finite_float)
+                                                    is_finite_float, axis=None)
      is_finite = math_ops.equal(reduced_is_finite_float,
                                 distribution.num_replicas_in_sync)
    else:
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt
@ -53,7 +53,7 @@ tf_class {
  }
  member_method {
    name: "reduce"
-    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
  }
  member_method {
    name: "scope"
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt
@ -53,7 +53,7 @@ tf_class {
  }
  member_method {
    name: "reduce"
-    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
  }
  member_method {
    name: "scope"
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt
@ -52,7 +52,7 @@ tf_class {
  }
  member_method {
    name: "reduce"
-    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
  }
  member_method {
    name: "scope"
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt
@ -53,7 +53,7 @@ tf_class {
  }
  member_method {
    name: "reduce"
-    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
  }
  member_method {
    name: "scope"
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt
@ -53,7 +53,7 @@ tf_class {
  }
  member_method {
    name: "reduce"
-    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
  }
  member_method {
    name: "scope"
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt
@ -53,7 +53,7 @@ tf_class {
  }
  member_method {
    name: "reduce"
-    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
  }
  member_method {
    name: "scope"
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt
@ -53,7 +53,7 @@ tf_class {
  }
  member_method {
    name: "reduce"
-    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
  }
  member_method {
    name: "scope"