Rename all_reduce_sum_gradients to experimental_aggregate_gradients
For some strategies we don't do all reduce, so all_reduce_sum_gradients can be misleading. The parameter is also changed to experimental because of issues with CentralStorageStrategy. PiperOrigin-RevId: 302734837 Change-Id: Ic30e2f81ab61eef568ee68e5752015f950117d47
This commit is contained in:
parent
448a04e7c4
commit
75ae7742ab
tensorflow
python
distribute
keras
tools/api/golden
v1
tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxttensorflow.keras.optimizers.-adadelta.pbtxttensorflow.keras.optimizers.-adagrad.pbtxttensorflow.keras.optimizers.-adam.pbtxttensorflow.keras.optimizers.-adamax.pbtxttensorflow.keras.optimizers.-ftrl.pbtxttensorflow.keras.optimizers.-nadam.pbtxttensorflow.keras.optimizers.-optimizer.pbtxttensorflow.keras.optimizers.-r-m-sprop.pbtxttensorflow.keras.optimizers.-s-g-d.pbtxt
v2
tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxttensorflow.keras.optimizers.-adadelta.pbtxttensorflow.keras.optimizers.-adagrad.pbtxttensorflow.keras.optimizers.-adam.pbtxttensorflow.keras.optimizers.-adamax.pbtxttensorflow.keras.optimizers.-ftrl.pbtxttensorflow.keras.optimizers.-nadam.pbtxttensorflow.keras.optimizers.-optimizer.pbtxttensorflow.keras.optimizers.-r-m-sprop.pbtxttensorflow.keras.optimizers.-s-g-d.pbtxttensorflow.optimizers.-adadelta.pbtxttensorflow.optimizers.-adagrad.pbtxttensorflow.optimizers.-adam.pbtxttensorflow.optimizers.-adamax.pbtxttensorflow.optimizers.-ftrl.pbtxttensorflow.optimizers.-nadam.pbtxttensorflow.optimizers.-optimizer.pbtxttensorflow.optimizers.-r-m-sprop.pbtxttensorflow.optimizers.-s-g-d.pbtxt
@ -41,7 +41,8 @@ class Hints(object):
|
||||
bytes_per_pack=50 * 1024 * 1024)
|
||||
grads = tf.distribute.get_replica_context().all_reduce(
|
||||
'sum', grads, experimental_hints=hints)
|
||||
optimizer.apply_gradients(zip(grads, vars), all_reduce_sum_gradients=False)
|
||||
optimizer.apply_gradients(zip(grads, vars),
|
||||
experimental_aggregate_gradients=False)
|
||||
```
|
||||
|
||||
"""
|
||||
|
@ -40,14 +40,14 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
|
||||
),
|
||||
combinations.concat(
|
||||
combinations.combine(
|
||||
all_reduce_sum_gradients=True,
|
||||
experimental_aggregate_gradients=True,
|
||||
expected=[[[-0.3, -0.3], [-0.3, -0.3]]]),
|
||||
combinations.combine(
|
||||
all_reduce_sum_gradients=False,
|
||||
experimental_aggregate_gradients=False,
|
||||
expected=[[[-0.1, -0.1], [-0.2, -0.2]]]),
|
||||
)))
|
||||
def test_custom_aggregation(self, distribution, all_reduce_sum_gradients,
|
||||
expected):
|
||||
def test_custom_aggregation(self, distribution,
|
||||
experimental_aggregate_gradients, expected):
|
||||
|
||||
with distribution.scope():
|
||||
v = variables.Variable([0., 0.])
|
||||
@ -62,7 +62,8 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
|
||||
|
||||
def step_fn(grads):
|
||||
optimizer.apply_gradients(
|
||||
[(grads, v)], all_reduce_sum_gradients=all_reduce_sum_gradients)
|
||||
[(grads, v)],
|
||||
experimental_aggregate_gradients=experimental_aggregate_gradients)
|
||||
return v.read_value()
|
||||
|
||||
return distribution.experimental_local_results(
|
||||
@ -74,9 +75,9 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
|
||||
combinations.combine(
|
||||
distribution=strategy_combinations.one_device_strategy,
|
||||
mode=["eager"],
|
||||
all_reduce_sum_gradients=[True, False]))
|
||||
experimental_aggregate_gradients=[True, False]))
|
||||
def test_custom_aggregation_one_device(self, distribution,
|
||||
all_reduce_sum_gradients):
|
||||
experimental_aggregate_gradients):
|
||||
|
||||
with distribution.scope():
|
||||
v = variables.Variable([0., 0.])
|
||||
@ -88,7 +89,8 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
|
||||
|
||||
def step_fn(grads):
|
||||
optimizer.apply_gradients(
|
||||
[(grads, v)], all_reduce_sum_gradients=all_reduce_sum_gradients)
|
||||
[(grads, v)],
|
||||
experimental_aggregate_gradients=experimental_aggregate_gradients)
|
||||
return v.read_value()
|
||||
|
||||
return distribution.experimental_local_results(
|
||||
|
@ -1794,7 +1794,7 @@ def _minimize(tape, optimizer, loss, trainable_variables):
|
||||
|
||||
gradients = tape.gradient(loss, trainable_variables)
|
||||
|
||||
if optimizer._HAS_ALL_REDUCE_SUM_GRAD: # pylint: disable=protected-access
|
||||
if optimizer._HAS_AGGREGATE_GRAD: # pylint: disable=protected-access
|
||||
# We aggregate gradients before unscaling them, in case a subclass of
|
||||
# LossScaleOptimizer all-reduces in fp16. All-reducing in fp16 can only be
|
||||
# done on scaled gradients, not unscaled gradients, for numeric stability.
|
||||
@ -1804,9 +1804,10 @@ def _minimize(tape, optimizer, loss, trainable_variables):
|
||||
gradients = optimizer.get_unscaled_gradients(gradients)
|
||||
gradients = optimizer._clip_gradients(gradients) # pylint: disable=protected-access
|
||||
if trainable_variables:
|
||||
if optimizer._HAS_ALL_REDUCE_SUM_GRAD: # pylint: disable=protected-access
|
||||
optimizer.apply_gradients(zip(gradients, trainable_variables),
|
||||
all_reduce_sum_gradients=False)
|
||||
if optimizer._HAS_AGGREGATE_GRAD: # pylint: disable=protected-access
|
||||
optimizer.apply_gradients(
|
||||
zip(gradients, trainable_variables),
|
||||
experimental_aggregate_gradients=False)
|
||||
else:
|
||||
optimizer.apply_gradients(zip(gradients, trainable_variables))
|
||||
|
||||
|
@ -1342,7 +1342,7 @@ class TrainingTest(keras_parameterized.TestCase):
|
||||
class _Optimizer(optimizer_v2.gradient_descent.SGD):
|
||||
"""Mock optimizer to check if _aggregate_gradient is called."""
|
||||
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = True
|
||||
_HAS_AGGREGATE_GRAD = True
|
||||
|
||||
def __init__(self):
|
||||
self.aggregate_gradients_called = False
|
||||
@ -1367,10 +1367,10 @@ class TrainingTest(keras_parameterized.TestCase):
|
||||
"""Override apply_gradients.
|
||||
|
||||
To test the case where the optimizer does not define the
|
||||
all_reduce_sum_gradients parameter.
|
||||
experimental_aggregate_gradients parameter.
|
||||
"""
|
||||
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = False
|
||||
_HAS_AGGREGATE_GRAD = False
|
||||
|
||||
def apply_gradients(self, grads_and_vars, name=None): # pylint: disable=useless-super-delegation
|
||||
return super(_OptimizerOverrideApplyGradients,
|
||||
|
@ -225,17 +225,19 @@ class LossScaleOptimizer(optimizer_v2.OptimizerV2):
|
||||
grads = self._optimizer.get_gradients(loss, params)
|
||||
return self.get_unscaled_gradients(grads)
|
||||
|
||||
def apply_gradients(self, grads_and_vars, name=None,
|
||||
all_reduce_sum_gradients=True):
|
||||
def apply_gradients(self,
|
||||
grads_and_vars,
|
||||
name=None,
|
||||
experimental_aggregate_gradients=True):
|
||||
if distribution_strategy_context.in_cross_replica_context():
|
||||
raise ValueError('apply_gradients() must be called in a replica context.')
|
||||
grads_and_vars = tuple(grads_and_vars)
|
||||
return distribution_strategy_context.get_replica_context().merge_call(
|
||||
self._apply_gradients_cross_replica,
|
||||
args=(grads_and_vars, name, all_reduce_sum_gradients))
|
||||
args=(grads_and_vars, name, experimental_aggregate_gradients))
|
||||
|
||||
def _apply_gradients_cross_replica(self, distribution, grads_and_vars, name,
|
||||
all_reduce_sum_gradients):
|
||||
experimental_aggregate_gradients):
|
||||
grads = [g for g, _ in grads_and_vars]
|
||||
loss_scale_update_op, should_apply_grads = self._loss_scale.update(grads)
|
||||
|
||||
@ -247,8 +249,8 @@ class LossScaleOptimizer(optimizer_v2.OptimizerV2):
|
||||
# MirroredVariables.
|
||||
wrapped_vars = _UnwrapPreventer([v for _, v in grads_and_vars])
|
||||
return distribution.extended.call_for_each_replica(
|
||||
self._apply_gradients, args=(grads, wrapped_vars, name,
|
||||
all_reduce_sum_gradients))
|
||||
self._apply_gradients,
|
||||
args=(grads, wrapped_vars, name, experimental_aggregate_gradients))
|
||||
|
||||
# Note: We must call this cond() in a cross-replica context.
|
||||
# DistributionStrategy does not support having a cond in a replica context
|
||||
@ -260,9 +262,10 @@ class LossScaleOptimizer(optimizer_v2.OptimizerV2):
|
||||
return control_flow_ops.group(maybe_apply_op, loss_scale_update_op)
|
||||
|
||||
def _apply_gradients(self, grads, wrapped_vars, name,
|
||||
all_reduce_sum_gradients):
|
||||
return self._optimizer.apply_gradients(list(zip(grads, wrapped_vars.value)),
|
||||
name, all_reduce_sum_gradients)
|
||||
experimental_aggregate_gradients):
|
||||
return self._optimizer.apply_gradients(
|
||||
list(zip(grads, wrapped_vars.value)), name,
|
||||
experimental_aggregate_gradients)
|
||||
|
||||
def get_config(self):
|
||||
serialized_optimizer = optimizers.serialize(self._optimizer)
|
||||
|
@ -375,13 +375,15 @@ class LossScaleOptimizerTest(test.TestCase, parameterized.TestCase):
|
||||
|
||||
class MyOptimizer(gradient_descent.SGD):
|
||||
|
||||
def apply_gradients(self, grads_and_vars, name=None,
|
||||
all_reduce_sum_gradients=True):
|
||||
def apply_gradients(self,
|
||||
grads_and_vars,
|
||||
name=None,
|
||||
experimental_aggregate_gradients=True):
|
||||
for grad, _ in grads_and_vars:
|
||||
outer_self.assertIsInstance(grad, ops.Tensor)
|
||||
return super(MyOptimizer,
|
||||
self).apply_gradients(grads_and_vars, name,
|
||||
all_reduce_sum_gradients)
|
||||
experimental_aggregate_gradients)
|
||||
|
||||
with create_mirrored_strategy().scope() as strategy:
|
||||
var = variables.Variable([5.0])
|
||||
|
@ -71,7 +71,7 @@ class Adadelta(optimizer_v2.OptimizerV2):
|
||||
([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
|
||||
"""
|
||||
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = True
|
||||
_HAS_AGGREGATE_GRAD = True
|
||||
|
||||
def __init__(self,
|
||||
learning_rate=0.001,
|
||||
|
@ -61,7 +61,7 @@ class Adagrad(optimizer_v2.OptimizerV2):
|
||||
(https://ppasupat.github.io/a9online/uploads/proximal_notes.pdf).
|
||||
"""
|
||||
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = True
|
||||
_HAS_AGGREGATE_GRAD = True
|
||||
|
||||
def __init__(self,
|
||||
learning_rate=0.001,
|
||||
|
@ -103,7 +103,7 @@ class Adam(optimizer_v2.OptimizerV2):
|
||||
9.9
|
||||
"""
|
||||
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = True
|
||||
_HAS_AGGREGATE_GRAD = True
|
||||
|
||||
def __init__(self,
|
||||
learning_rate=0.001,
|
||||
@ -354,7 +354,7 @@ class NonFusedAdam(optimizer_v2.OptimizerV2):
|
||||
9.9
|
||||
"""
|
||||
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = True
|
||||
_HAS_AGGREGATE_GRAD = True
|
||||
|
||||
def __init__(self,
|
||||
learning_rate=0.001,
|
||||
|
@ -73,7 +73,7 @@ class Adamax(optimizer_v2.OptimizerV2):
|
||||
([pdf](http://arxiv.org/pdf/1412.6980.pdf)).
|
||||
"""
|
||||
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = True
|
||||
_HAS_AGGREGATE_GRAD = True
|
||||
|
||||
def __init__(self,
|
||||
learning_rate=0.001,
|
||||
|
@ -74,7 +74,7 @@ class SGD(optimizer_v2.OptimizerV2):
|
||||
http://jmlr.org/proceedings/papers/v28/sutskever13.pdf).
|
||||
"""
|
||||
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = True
|
||||
_HAS_AGGREGATE_GRAD = True
|
||||
|
||||
def __init__(self,
|
||||
learning_rate=0.01,
|
||||
|
@ -61,7 +61,7 @@ class Nadam(optimizer_v2.OptimizerV2):
|
||||
See [Dozat, T., 2015](http://cs229.stanford.edu/proj2015/054_report.pdf).
|
||||
"""
|
||||
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = True
|
||||
_HAS_AGGREGATE_GRAD = True
|
||||
|
||||
def __init__(self,
|
||||
learning_rate=0.001,
|
||||
|
@ -160,8 +160,8 @@ class OptimizerV2(trackable.Trackable):
|
||||
`tf.keras.losses.Reduction.SUM` for not.
|
||||
|
||||
To aggregate gradients yourself, call `apply_gradients` with
|
||||
`all_reduce_sum_gradients` set to False. This is useful if you need to process
|
||||
aggregated gradients.
|
||||
`experimental_aggregate_gradients` set to False. This is useful if you need to
|
||||
process aggregated gradients.
|
||||
|
||||
If you are not using these and you want to average gradients, you should use
|
||||
`tf.math.reduce_sum` to add up your per-example losses and then divide by the
|
||||
@ -230,13 +230,13 @@ class OptimizerV2(trackable.Trackable):
|
||||
"""
|
||||
|
||||
# Subclasses should set this to True unless they override `apply_gradients`
|
||||
# with a version that does not have the `all_reduce_sum_gradients` argument.
|
||||
# Older versions of Keras did not have this argument so custom optimizers may
|
||||
# have overridden `apply_gradients` without the `all_reduce_sum_gradients`
|
||||
# argument. Keras only passes `all_reduce_sum_gradients` if this attribute is
|
||||
# True.
|
||||
# with a version that does not have the `experimental_aggregate_gradients`
|
||||
# argument. Older versions of Keras did not have this argument so custom
|
||||
# optimizers may have overridden `apply_gradients` without the
|
||||
# `experimental_aggregate_gradients` argument. Keras only passes
|
||||
# `experimental_aggregate_gradients` if this attribute is True.
|
||||
# Note: This attribute will likely be removed in an upcoming release.
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = False
|
||||
_HAS_AGGREGATE_GRAD = False
|
||||
|
||||
def __init__(self, name, **kwargs):
|
||||
"""Create a new Optimizer.
|
||||
@ -433,7 +433,7 @@ class OptimizerV2(trackable.Trackable):
|
||||
def apply_gradients(self,
|
||||
grads_and_vars,
|
||||
name=None,
|
||||
all_reduce_sum_gradients=True):
|
||||
experimental_aggregate_gradients=True):
|
||||
"""Apply gradients to variables.
|
||||
|
||||
This is the second part of `minimize()`. It returns an `Operation` that
|
||||
@ -441,7 +441,7 @@ class OptimizerV2(trackable.Trackable):
|
||||
|
||||
The method sums gradients from all replicas in the presence of
|
||||
`tf.distribute.Strategy` by default. You can aggregate gradients yourself by
|
||||
passing `all_reduce_sum_gradients=False`.
|
||||
passing `experimental_aggregate_gradients=False`.
|
||||
|
||||
Example:
|
||||
|
||||
@ -449,7 +449,8 @@ class OptimizerV2(trackable.Trackable):
|
||||
grads = tape.gradient(loss, vars)
|
||||
grads = tf.distribute.get_replica_context().all_reduce('sum', grads)
|
||||
# Processing aggregated gradients.
|
||||
optimizer.apply_gradients(zip(grads, vars), all_reduce_sum_gradients=False)
|
||||
optimizer.apply_gradients(zip(grads, vars),
|
||||
experimental_aggregate_gradients=False)
|
||||
|
||||
```
|
||||
|
||||
@ -457,7 +458,7 @@ class OptimizerV2(trackable.Trackable):
|
||||
grads_and_vars: List of (gradient, variable) pairs.
|
||||
name: Optional name for the returned operation. Default to the name passed
|
||||
to the `Optimizer` constructor.
|
||||
all_reduce_sum_gradients: Whether to sum gradients from different
|
||||
experimental_aggregate_gradients: Whether to sum gradients from different
|
||||
replicas in the presense of `tf.distribute.Strategy`. If False, it's
|
||||
user responsibility to aggregate the gradients. Default to True.
|
||||
|
||||
@ -491,7 +492,7 @@ class OptimizerV2(trackable.Trackable):
|
||||
"context.")
|
||||
|
||||
apply_state = self._prepare(var_list)
|
||||
if all_reduce_sum_gradients:
|
||||
if experimental_aggregate_gradients:
|
||||
reduced_grads = self._aggregate_gradients(grads_and_vars)
|
||||
var_list = [v for _, v in grads_and_vars]
|
||||
grads_and_vars = list(zip(reduced_grads, var_list))
|
||||
|
@ -627,7 +627,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
|
||||
|
||||
@combinations.generate(combinations.combine(mode=['graph', 'eager']))
|
||||
def testAggregationTrue(self):
|
||||
# Test that all_reduce_sum_gradients=True works without distributed
|
||||
# Test that experimental_aggregate_gradients=True works without distributed
|
||||
# strategy.
|
||||
var = resource_variable_ops.ResourceVariable([1., 2.])
|
||||
opt = gradient_descent.SGD(3.0)
|
||||
@ -635,14 +635,14 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
|
||||
self.evaluate(variables.global_variables_initializer())
|
||||
self.assertAllClose([1., 2.], self.evaluate(var))
|
||||
opt_op = opt.apply_gradients([([0.1, 0.1], var)],
|
||||
all_reduce_sum_gradients=True)
|
||||
experimental_aggregate_gradients=True)
|
||||
self.evaluate(variables.global_variables_initializer())
|
||||
self.evaluate(opt_op)
|
||||
self.assertAllClose([0.7, 1.7], self.evaluate(var))
|
||||
|
||||
@combinations.generate(combinations.combine(mode=['graph', 'eager']))
|
||||
def testAggregationFalse(self):
|
||||
# Test that all_reduce_sum_gradients=False works without distributed
|
||||
# Test that experimental_aggregate_gradients=False works without distributed
|
||||
# strategy.
|
||||
var = resource_variable_ops.ResourceVariable([1., 2.])
|
||||
opt = gradient_descent.SGD(3.0)
|
||||
@ -650,7 +650,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase):
|
||||
self.evaluate(variables.global_variables_initializer())
|
||||
self.assertAllClose([1., 2.], self.evaluate(var))
|
||||
opt_op = opt.apply_gradients([([0.1, 0.1], var)],
|
||||
all_reduce_sum_gradients=False)
|
||||
experimental_aggregate_gradients=False)
|
||||
self.evaluate(variables.global_variables_initializer())
|
||||
self.evaluate(opt_op)
|
||||
self.assertAllClose([0.7, 1.7], self.evaluate(var))
|
||||
|
@ -91,7 +91,7 @@ class RMSprop(optimizer_v2.OptimizerV2):
|
||||
http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf).
|
||||
"""
|
||||
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = True
|
||||
_HAS_AGGREGATE_GRAD = True
|
||||
|
||||
def __init__(self,
|
||||
learning_rate=0.001,
|
||||
|
@ -72,8 +72,8 @@ class Optimizer(object):
|
||||
self.weights = []
|
||||
|
||||
# Set this to False, indicating `apply_gradients` does not take the
|
||||
# `all_reduce_sum_gradients` argument.
|
||||
_HAS_ALL_REDUCE_SUM_GRAD = False
|
||||
# `experimental_aggregate_gradients` argument.
|
||||
_HAS_AGGREGATE_GRAD = False
|
||||
|
||||
def get_updates(self, loss, params):
|
||||
raise NotImplementedError
|
||||
|
@ -38,7 +38,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -25,7 +25,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -38,7 +38,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -25,7 +25,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -25,7 +25,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
@ -26,7 +26,7 @@ tf_class {
|
||||
}
|
||||
member_method {
|
||||
name: "apply_gradients"
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'all_reduce_sum_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
argspec: "args=[\'self\', \'grads_and_vars\', \'name\', \'experimental_aggregate_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
|
Loading…
Reference in New Issue
Block a user