diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py index 1156187b971..b029bc44a6b 100644 --- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py +++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py @@ -293,7 +293,7 @@ class CollectiveAllReduceStrategyTestBase( return array_ops.identity(x) x = distribution.extended.call_for_each_replica(model_fn) - reduced_x = distribution.reduce(reduce_util.ReduceOp.MEAN, x) + reduced_x = distribution.reduce(reduce_util.ReduceOp.MEAN, x, axis=None) x = distribution.experimental_local_results(x)[0] sess.run(variables.global_variables_initializer()) diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py index 16dc12cc070..061d5bcca91 100644 --- a/tensorflow/python/distribute/distribute_lib.py +++ b/tensorflow/python/distribute/distribute_lib.py @@ -438,7 +438,7 @@ class Strategy(object): with self.scope(): return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs) - def reduce(self, reduce_op, value, axis=None): + def reduce(self, reduce_op, value, axis): """Reduce `value` across replicas. Given a per-replica value returned by `experimental_run_v2`, say a @@ -468,8 +468,10 @@ class Strategy(object): be combined. value: A "per replica" value, e.g. returned by `experimental_run_v2` to be combined into a single tensor. - axis: Optional. Specifies the dimension to reduce along within each - replica's tensor. Should typically be set to the batch dimension. + axis: Specifies the dimension to reduce along within each + replica's tensor. Should typically be set to the batch dimension, or + `None` to only reduce across replicas (e.g. if the tensor has no batch + dimension). Returns: A `Tensor`. @@ -729,6 +731,11 @@ class StrategyV1(Strategy): return super(StrategyV1, self).experimental_run( fn, input_iterator) + def reduce(self, reduce_op, value, axis=None): + return super(StrategyV1, self).reduce(reduce_op, value, axis) + + reduce.__doc__ = Strategy.reduce.__doc__ + def update_config_proto(self, config_proto): """Returns a copy of `config_proto` modified for use with this strategy. diff --git a/tensorflow/python/distribute/distribute_lib_test.py b/tensorflow/python/distribute/distribute_lib_test.py index 451502da58c..5f12c634f52 100644 --- a/tensorflow/python/distribute/distribute_lib_test.py +++ b/tensorflow/python/distribute/distribute_lib_test.py @@ -321,7 +321,7 @@ class TestStrategyTest(test.TestCase): @_run_in_and_out_of_scope def testReduce(self, dist): x = constant_op.constant(1.) - x_r = dist.reduce(reduce_util.ReduceOp.MEAN, x) + x_r = dist.reduce(reduce_util.ReduceOp.MEAN, x, axis=None) self.assertEqual(self.evaluate(x), self.evaluate(x_r)) def testReductions_acceptStringOps(self): @@ -329,7 +329,7 @@ class TestStrategyTest(test.TestCase): for op in ("mean", "MEAN", "sum", "SUM"): x = constant_op.constant(1.) y = constant_op.constant(1.) - x_r = dist.reduce(op, x) + x_r = dist.reduce(op, x, axis=None) self.assertEqual(self.evaluate(x), self.evaluate(x_r)) x_r = dist.extended.reduce_to(op, x, "/CPU:0") self.assertEqual(self.evaluate(x), self.evaluate(x_r)) diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py index be619f72486..e90c32f6666 100644 --- a/tensorflow/python/distribute/input_lib.py +++ b/tensorflow/python/distribute/input_lib.py @@ -804,11 +804,13 @@ class MultiStepContext(object): self._last_step_outputs[name] = output else: distribution = distribution_strategy_context.get_strategy() - self._last_step_outputs[name] = distribution.reduce(reduce_op, output) + self._last_step_outputs[name] = distribution.reduce(reduce_op, output, + axis=None) else: assert reduce_op is not None def merge_fn(distribution, value): - self._last_step_outputs[name] = distribution.reduce(reduce_op, value) + self._last_step_outputs[name] = distribution.reduce(reduce_op, value, + axis=None) # Setting this inside the `merge_fn` because all replicas share the same # context object, so it's more robust to set it only once (even if all # the replicas are trying to set the same value). diff --git a/tensorflow/python/distribute/minimize_loss_test.py b/tensorflow/python/distribute/minimize_loss_test.py index ee514d6ee6d..6f6af500c53 100644 --- a/tensorflow/python/distribute/minimize_loss_test.py +++ b/tensorflow/python/distribute/minimize_loss_test.py @@ -514,7 +514,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase): if not reduced: self.assertLen(distribution.experimental_local_results(loss_output), distribution.num_replicas_in_sync) - loss_tensor = distribution.reduce(reduce_util.ReduceOp.MEAN, loss_output) + loss_tensor = distribution.reduce(reduce_util.ReduceOp.MEAN, loss_output, + axis=None) else: unwrapped_output = distribution.experimental_local_results(loss_output) self.assertLen(unwrapped_output, 1) diff --git a/tensorflow/python/distribute/mirrored_strategy_test.py b/tensorflow/python/distribute/mirrored_strategy_test.py index 8686d4d253d..166dca5cada 100644 --- a/tensorflow/python/distribute/mirrored_strategy_test.py +++ b/tensorflow/python/distribute/mirrored_strategy_test.py @@ -103,7 +103,7 @@ class MirroredTwoDeviceDistributionTest( def testReduceToCpu(self, distribution): with distribution.scope(): result = distribution.extended.call_for_each_replica(_replica_id) - reduced = distribution.reduce(reduce_util.ReduceOp.SUM, result) + reduced = distribution.reduce(reduce_util.ReduceOp.SUM, result, axis=None) expected = sum(range(distribution.num_replicas_in_sync)) self.assertEqual(expected, self.evaluate(reduced)) diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index 7012eef2194..4268416e2f7 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -1310,7 +1310,8 @@ class SyncOnReadVariable(DistributedVariable, PerReplica): if self._aggregation == vs.VariableAggregation.ONLY_FIRST_REPLICA: return self.primary return self._distribute_strategy.reduce( - reduce_util.ReduceOp.from_variable_aggregation(self.aggregation), self) + reduce_util.ReduceOp.from_variable_aggregation(self.aggregation), self, + axis=None) def _as_graph_element(self): # pylint: disable=protected-access diff --git a/tensorflow/python/keras/distribute/distributed_training_utils.py b/tensorflow/python/keras/distribute/distributed_training_utils.py index 43a4a10ac1b..cbd802736e3 100644 --- a/tensorflow/python/keras/distribute/distributed_training_utils.py +++ b/tensorflow/python/keras/distribute/distributed_training_utils.py @@ -107,7 +107,7 @@ def unwrap_values(distribution_strategy, grouped_inputs, grouped_outputs, if with_loss_tensor: # reduce loss tensor before adding it to the list of fetches loss = distribution_strategy.reduce(reduce_util.ReduceOp.SUM, - grouped_outputs[0]) + grouped_outputs[0], axis=None) all_outputs = flatten_perdevice_values(distribution_strategy, grouped_outputs[1:]) all_outputs = [loss] + all_outputs diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py index 93ebf5c9827..b45c645420c 100644 --- a/tensorflow/python/keras/engine/training_distributed.py +++ b/tensorflow/python/keras/engine/training_distributed.py @@ -497,7 +497,8 @@ def experimental_tpu_test_loop(model, # We reduce all other metrics using mean for now. This is temporary # workaround until new metrics are in place. reduce_op = ds_reduce_util.ReduceOp.MEAN - output_tensors[label] = current_strategy.reduce(reduce_op, output) + output_tensors[label] = current_strategy.reduce(reduce_op, output, + axis=None) test_op = control_flow_ops.group(list(output_tensors.values())) if verbose >= 1: diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale.py index d686eabd849..e72983ee491 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale.py @@ -289,7 +289,7 @@ class DynamicLossScale(LossScale): is_finite_float = distribution.extended.call_for_each_replica( get_is_finite, args=(grads,)) reduced_is_finite_float = distribution.reduce(reduce_util.ReduceOp.SUM, - is_finite_float) + is_finite_float, axis=None) is_finite = math_ops.equal(reduced_is_finite_float, distribution.num_replicas_in_sync) else: diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt index d4336e674c7..ca2d40eb7f5 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt @@ -53,7 +53,7 @@ tf_class { } member_method { name: "reduce" - argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None" } member_method { name: "scope" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt index f460ace8cbc..b86a87a07ac 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt @@ -53,7 +53,7 @@ tf_class { } member_method { name: "reduce" - argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None" } member_method { name: "scope" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt index 941bc034063..52d8b0d040f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt @@ -52,7 +52,7 @@ tf_class { } member_method { name: "reduce" - argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None" } member_method { name: "scope" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt index c0e2e8b2389..e1ae51ef1b1 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt @@ -53,7 +53,7 @@ tf_class { } member_method { name: "reduce" - argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None" } member_method { name: "scope" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt index 3547b085a6f..42c35a8aa0f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt @@ -53,7 +53,7 @@ tf_class { } member_method { name: "reduce" - argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None" } member_method { name: "scope" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt index 230b8515114..a3049fca55a 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt @@ -53,7 +53,7 @@ tf_class { } member_method { name: "reduce" - argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None" } member_method { name: "scope" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt index a2feb6191ab..31031c790af 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt @@ -53,7 +53,7 @@ tf_class { } member_method { name: "reduce" - argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None" } member_method { name: "scope"