tf.distribute.Strategy.reduce: Remove default value for axis argument for Strategy V2, but keep for Strategy V1.

Change all callers to specify the argument, so their code continues to work with Strategy V2.
For now most of the callers use axis=None. If this continues to remain the most common case, we can consider adding back the default in the future.

PiperOrigin-RevId: 243462195
This commit is contained in:
Priya Gupta 2019-04-13 19:58:00 -07:00 committed by TensorFlower Gardener
parent 06d2ea93fa
commit 471993acd6
17 changed files with 33 additions and 21 deletions

View File

@ -293,7 +293,7 @@ class CollectiveAllReduceStrategyTestBase(
return array_ops.identity(x)
x = distribution.extended.call_for_each_replica(model_fn)
reduced_x = distribution.reduce(reduce_util.ReduceOp.MEAN, x)
reduced_x = distribution.reduce(reduce_util.ReduceOp.MEAN, x, axis=None)
x = distribution.experimental_local_results(x)[0]
sess.run(variables.global_variables_initializer())

View File

@ -438,7 +438,7 @@ class Strategy(object):
with self.scope():
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
def reduce(self, reduce_op, value, axis=None):
def reduce(self, reduce_op, value, axis):
"""Reduce `value` across replicas.
Given a per-replica value returned by `experimental_run_v2`, say a
@ -468,8 +468,10 @@ class Strategy(object):
be combined.
value: A "per replica" value, e.g. returned by `experimental_run_v2` to
be combined into a single tensor.
axis: Optional. Specifies the dimension to reduce along within each
replica's tensor. Should typically be set to the batch dimension.
axis: Specifies the dimension to reduce along within each
replica's tensor. Should typically be set to the batch dimension, or
`None` to only reduce across replicas (e.g. if the tensor has no batch
dimension).
Returns:
A `Tensor`.
@ -729,6 +731,11 @@ class StrategyV1(Strategy):
return super(StrategyV1, self).experimental_run(
fn, input_iterator)
def reduce(self, reduce_op, value, axis=None):
return super(StrategyV1, self).reduce(reduce_op, value, axis)
reduce.__doc__ = Strategy.reduce.__doc__
def update_config_proto(self, config_proto):
"""Returns a copy of `config_proto` modified for use with this strategy.

View File

@ -321,7 +321,7 @@ class TestStrategyTest(test.TestCase):
@_run_in_and_out_of_scope
def testReduce(self, dist):
x = constant_op.constant(1.)
x_r = dist.reduce(reduce_util.ReduceOp.MEAN, x)
x_r = dist.reduce(reduce_util.ReduceOp.MEAN, x, axis=None)
self.assertEqual(self.evaluate(x), self.evaluate(x_r))
def testReductions_acceptStringOps(self):
@ -329,7 +329,7 @@ class TestStrategyTest(test.TestCase):
for op in ("mean", "MEAN", "sum", "SUM"):
x = constant_op.constant(1.)
y = constant_op.constant(1.)
x_r = dist.reduce(op, x)
x_r = dist.reduce(op, x, axis=None)
self.assertEqual(self.evaluate(x), self.evaluate(x_r))
x_r = dist.extended.reduce_to(op, x, "/CPU:0")
self.assertEqual(self.evaluate(x), self.evaluate(x_r))

View File

@ -804,11 +804,13 @@ class MultiStepContext(object):
self._last_step_outputs[name] = output
else:
distribution = distribution_strategy_context.get_strategy()
self._last_step_outputs[name] = distribution.reduce(reduce_op, output)
self._last_step_outputs[name] = distribution.reduce(reduce_op, output,
axis=None)
else:
assert reduce_op is not None
def merge_fn(distribution, value):
self._last_step_outputs[name] = distribution.reduce(reduce_op, value)
self._last_step_outputs[name] = distribution.reduce(reduce_op, value,
axis=None)
# Setting this inside the `merge_fn` because all replicas share the same
# context object, so it's more robust to set it only once (even if all
# the replicas are trying to set the same value).

View File

@ -514,7 +514,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
if not reduced:
self.assertLen(distribution.experimental_local_results(loss_output),
distribution.num_replicas_in_sync)
loss_tensor = distribution.reduce(reduce_util.ReduceOp.MEAN, loss_output)
loss_tensor = distribution.reduce(reduce_util.ReduceOp.MEAN, loss_output,
axis=None)
else:
unwrapped_output = distribution.experimental_local_results(loss_output)
self.assertLen(unwrapped_output, 1)

View File

@ -103,7 +103,7 @@ class MirroredTwoDeviceDistributionTest(
def testReduceToCpu(self, distribution):
with distribution.scope():
result = distribution.extended.call_for_each_replica(_replica_id)
reduced = distribution.reduce(reduce_util.ReduceOp.SUM, result)
reduced = distribution.reduce(reduce_util.ReduceOp.SUM, result, axis=None)
expected = sum(range(distribution.num_replicas_in_sync))
self.assertEqual(expected, self.evaluate(reduced))

View File

@ -1310,7 +1310,8 @@ class SyncOnReadVariable(DistributedVariable, PerReplica):
if self._aggregation == vs.VariableAggregation.ONLY_FIRST_REPLICA:
return self.primary
return self._distribute_strategy.reduce(
reduce_util.ReduceOp.from_variable_aggregation(self.aggregation), self)
reduce_util.ReduceOp.from_variable_aggregation(self.aggregation), self,
axis=None)
def _as_graph_element(self):
# pylint: disable=protected-access

View File

@ -107,7 +107,7 @@ def unwrap_values(distribution_strategy, grouped_inputs, grouped_outputs,
if with_loss_tensor:
# reduce loss tensor before adding it to the list of fetches
loss = distribution_strategy.reduce(reduce_util.ReduceOp.SUM,
grouped_outputs[0])
grouped_outputs[0], axis=None)
all_outputs = flatten_perdevice_values(distribution_strategy,
grouped_outputs[1:])
all_outputs = [loss] + all_outputs

View File

@ -497,7 +497,8 @@ def experimental_tpu_test_loop(model,
# We reduce all other metrics using mean for now. This is temporary
# workaround until new metrics are in place.
reduce_op = ds_reduce_util.ReduceOp.MEAN
output_tensors[label] = current_strategy.reduce(reduce_op, output)
output_tensors[label] = current_strategy.reduce(reduce_op, output,
axis=None)
test_op = control_flow_ops.group(list(output_tensors.values()))
if verbose >= 1:

View File

@ -289,7 +289,7 @@ class DynamicLossScale(LossScale):
is_finite_float = distribution.extended.call_for_each_replica(
get_is_finite, args=(grads,))
reduced_is_finite_float = distribution.reduce(reduce_util.ReduceOp.SUM,
is_finite_float)
is_finite_float, axis=None)
is_finite = math_ops.equal(reduced_is_finite_float,
distribution.num_replicas_in_sync)
else:

View File

@ -53,7 +53,7 @@ tf_class {
}
member_method {
name: "reduce"
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "scope"

View File

@ -53,7 +53,7 @@ tf_class {
}
member_method {
name: "reduce"
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "scope"

View File

@ -52,7 +52,7 @@ tf_class {
}
member_method {
name: "reduce"
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "scope"

View File

@ -53,7 +53,7 @@ tf_class {
}
member_method {
name: "reduce"
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "scope"

View File

@ -53,7 +53,7 @@ tf_class {
}
member_method {
name: "reduce"
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "scope"

View File

@ -53,7 +53,7 @@ tf_class {
}
member_method {
name: "reduce"
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "scope"

View File

@ -53,7 +53,7 @@ tf_class {
}
member_method {
name: "reduce"
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=[\'None\'], "
argspec: "args=[\'self\', \'reduce_op\', \'value\', \'axis\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "scope"