Rename LossScalingGradientTape to LossScaleGradientTape.

This makes it more consistent with LossScale and LossScaleOptimizer.

Since LossScalingGradientTape is not yet in a stable release, no need to worry about breaking anyone.

PiperOrigin-RevId: 272067559
This commit is contained in:
Reed Wanderman-Milne 2019-09-30 14:35:03 -07:00 committed by TensorFlower Gardener
parent 0175008120
commit e323183bf6
4 changed files with 27 additions and 27 deletions

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains Loss Scaling Gradient Tape."""
"""Contains Loss Scale Gradient Tape."""
from __future__ import absolute_import
from __future__ import division
@ -26,8 +26,8 @@ from tensorflow.python.util import nest
from tensorflow.python.util.tf_export import tf_export
@tf_export("mixed_precision.experimental.LossScalingGradientTape", v1=[])
class LossScalingGradientTape(backprop.GradientTape):
@tf_export("mixed_precision.experimental.LossScaleGradientTape", v1=[])
class LossScaleGradientTape(backprop.GradientTape):
"""A gradient tape that scales losses and unscales resulting gradients.
Operates as a normal gradient tape, but takes in a
@ -51,7 +51,7 @@ class LossScalingGradientTape(backprop.GradientTape):
model_loss_scale = tf.train.experimental.DynamicLossScale()
for step in training_steps:
with LossScalingGradientTape(model_loss_scale) as tape:
with LossScaleGradientTape(model_loss_scale) as tape:
logits = ... # Run model and get logits
loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
labels=labels)
@ -66,7 +66,7 @@ class LossScalingGradientTape(backprop.GradientTape):
loss_scale,
persistent=False,
watch_accessed_variables=True):
"""Creates a new LossScalingGradientTape.
"""Creates a new LossScaleGradientTape.
Args:
loss_scale: `tf.train.experimental.LossScale` object that
@ -89,8 +89,8 @@ class LossScalingGradientTape(backprop.GradientTape):
raise ValueError("`loss_scale` must be an instance of LossScale.")
# always make a persistent tape to loop over loss scaling
super(LossScalingGradientTape, self).__init__(True,
watch_accessed_variables)
super(LossScaleGradientTape, self).__init__(True,
watch_accessed_variables)
self._outer_persistent = persistent
self._loss_scale = loss_scale
@ -142,7 +142,7 @@ class LossScalingGradientTape(backprop.GradientTape):
loss_scale = self._loss_scale()
scaled_target = nest.map_structure(lambda t: t * loss_scale, target)
old_grads = super(LossScalingGradientTape, self).gradient(
old_grads = super(LossScaleGradientTape, self).gradient(
scaled_target, sources, output_gradients, unconnected_gradients)
inv_loss_scale = 1.0 / self._loss_scale()
grads = nest.map_structure(lambda g: inv_loss_scale * g, old_grads)

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for lsgt.LossScalingGradientTape."""
"""Tests for lsgt.LossScaleGradientTape."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@ -27,13 +27,13 @@ from tensorflow.python.training.experimental import loss_scale as loss_scale_mod
from tensorflow.python.training.experimental import loss_scaling_gradient_tape as lsgt
class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
@parameterized.parameters(loss_scale_module.FixedLossScale,
loss_scale_module.DynamicLossScale)
def test_basic_tapes_eager_mode(self, loss_scale):
x = constant_op.constant(3.0)
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
g.watch(x)
y = x * x
dy_dx = g.gradient(y, x)
@ -47,7 +47,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
@def_function.function
def _inner_test():
x = constant_op.constant(3.0)
with lsgt.LossScalingGradientTape(loss_scale) as g:
with lsgt.LossScaleGradientTape(loss_scale) as g:
g.watch(x)
y = x * x
return g.gradient(y, x)
@ -57,9 +57,9 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
loss_scale_module.DynamicLossScale)
def test_nested_tapes(self, loss_scale):
x = constant_op.constant(3.0)
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
g.watch(x)
with lsgt.LossScalingGradientTape(loss_scale(32)) as gg:
with lsgt.LossScaleGradientTape(loss_scale(32)) as gg:
gg.watch(x)
y = x * x
dy_dx = gg.gradient(y, x)
@ -71,7 +71,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
loss_scale_module.DynamicLossScale)
def test_non_persistent_tapes_error(self, loss_scale):
x = constant_op.constant(3.0)
with lsgt.LossScalingGradientTape(loss_scale(32), persistent=False) as g:
with lsgt.LossScaleGradientTape(loss_scale(32), persistent=False) as g:
g.watch(x)
y = x * x
z = y * y
@ -83,7 +83,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
loss_scale_module.DynamicLossScale)
def test_persistent_tapes(self, loss_scale):
x = constant_op.constant(3.0)
with lsgt.LossScalingGradientTape(loss_scale(32), persistent=True) as g:
with lsgt.LossScaleGradientTape(loss_scale(32), persistent=True) as g:
g.watch(x)
y = x * x
z = y * y
@ -97,7 +97,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
def test_nested_sources(self, loss_scale):
x = (constant_op.constant(19.0), (constant_op.constant(8.),
constant_op.constant(9.)))
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
g.watch(x)
y = x * 13
dy_dx = g.gradient(y, x)
@ -107,7 +107,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
loss_scale_module.DynamicLossScale)
def test_nested_targets(self, loss_scale):
w = constant_op.constant(3.0)
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
g.watch(w)
x = w * 5
y = w * 7
@ -119,7 +119,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
loss_scale_module.DynamicLossScale)
def test_scaling_inf_gradient(self, loss_scale):
x = constant_op.constant(1.0)
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
g.watch(x)
y = x * np.inf
dy_dx = g.gradient(y, x)
@ -129,7 +129,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
loss_scale_module.DynamicLossScale)
def test_scaling_nan_gradient(self, loss_scale):
x = constant_op.constant(1.0)
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
g.watch(x)
y = x * np.nan
dy_dx = g.gradient(y, x)
@ -139,7 +139,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
def test_dynamic_scale_to_one_on_non_finite_gradient(self, non_finite_term):
loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
x = constant_op.constant(1.0)
with lsgt.LossScalingGradientTape(loss_scale) as g:
with lsgt.LossScaleGradientTape(loss_scale) as g:
g.watch(x)
y = x * non_finite_term
g.gradient(y, x)
@ -150,7 +150,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
is_non_finite):
loss_scale = loss_scale_module.FixedLossScale(32)
x = constant_op.constant(1.0)
with lsgt.LossScalingGradientTape(loss_scale) as g:
with lsgt.LossScaleGradientTape(loss_scale) as g:
g.watch(x)
y = x * non_finite_term
dy_dx = g.gradient(y, x)
@ -160,7 +160,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
def test_dynamic_loss_scaling_down_loop(self):
loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
x = constant_op.constant(1.0)
with lsgt.LossScalingGradientTape(loss_scale) as g:
with lsgt.LossScaleGradientTape(loss_scale) as g:
g.watch(x)
y = x * (3.0 * (10**37)) # grad will be inf after scaling
dy_dx = g.gradient(y, x)
@ -170,7 +170,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
def test_dynamic_loss_scaling_inf_target_post_scale(self):
loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32.0)
x = constant_op.constant(3.0 * (10**37))
with lsgt.LossScalingGradientTape(loss_scale) as g:
with lsgt.LossScaleGradientTape(loss_scale) as g:
g.watch(x)
y = x * 3.0 # target will be inf after scaling
dy_dx = g.gradient(y, x)

View File

@ -1,6 +1,6 @@
path: "tensorflow.mixed_precision.experimental.LossScalingGradientTape"
path: "tensorflow.mixed_precision.experimental.LossScaleGradientTape"
tf_class {
is_instance: "<class \'tensorflow.python.training.experimental.loss_scaling_gradient_tape.LossScalingGradientTape\'>"
is_instance: "<class \'tensorflow.python.training.experimental.loss_scaling_gradient_tape.LossScaleGradientTape\'>"
is_instance: "<class \'tensorflow.python.eager.backprop.GradientTape\'>"
is_instance: "<type \'object\'>"
member_method {

View File

@ -1,7 +1,7 @@
path: "tensorflow.mixed_precision.experimental"
tf_module {
member {
name: "LossScalingGradientTape"
name: "LossScaleGradientTape"
mtype: "<type \'type\'>"
}
}