Rename LossScalingGradientTape to LossScaleGradientTape.
This makes it more consistent with LossScale and LossScaleOptimizer. Since LossScalingGradientTape is not yet in a stable release, no need to worry about breaking anyone. PiperOrigin-RevId: 272067559
This commit is contained in:
parent
0175008120
commit
e323183bf6
@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Contains Loss Scaling Gradient Tape."""
|
||||
"""Contains Loss Scale Gradient Tape."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
@ -26,8 +26,8 @@ from tensorflow.python.util import nest
|
||||
from tensorflow.python.util.tf_export import tf_export
|
||||
|
||||
|
||||
@tf_export("mixed_precision.experimental.LossScalingGradientTape", v1=[])
|
||||
class LossScalingGradientTape(backprop.GradientTape):
|
||||
@tf_export("mixed_precision.experimental.LossScaleGradientTape", v1=[])
|
||||
class LossScaleGradientTape(backprop.GradientTape):
|
||||
"""A gradient tape that scales losses and unscales resulting gradients.
|
||||
|
||||
Operates as a normal gradient tape, but takes in a
|
||||
@ -51,7 +51,7 @@ class LossScalingGradientTape(backprop.GradientTape):
|
||||
model_loss_scale = tf.train.experimental.DynamicLossScale()
|
||||
|
||||
for step in training_steps:
|
||||
with LossScalingGradientTape(model_loss_scale) as tape:
|
||||
with LossScaleGradientTape(model_loss_scale) as tape:
|
||||
logits = ... # Run model and get logits
|
||||
loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
|
||||
labels=labels)
|
||||
@ -66,7 +66,7 @@ class LossScalingGradientTape(backprop.GradientTape):
|
||||
loss_scale,
|
||||
persistent=False,
|
||||
watch_accessed_variables=True):
|
||||
"""Creates a new LossScalingGradientTape.
|
||||
"""Creates a new LossScaleGradientTape.
|
||||
|
||||
Args:
|
||||
loss_scale: `tf.train.experimental.LossScale` object that
|
||||
@ -89,8 +89,8 @@ class LossScalingGradientTape(backprop.GradientTape):
|
||||
raise ValueError("`loss_scale` must be an instance of LossScale.")
|
||||
|
||||
# always make a persistent tape to loop over loss scaling
|
||||
super(LossScalingGradientTape, self).__init__(True,
|
||||
watch_accessed_variables)
|
||||
super(LossScaleGradientTape, self).__init__(True,
|
||||
watch_accessed_variables)
|
||||
self._outer_persistent = persistent
|
||||
self._loss_scale = loss_scale
|
||||
|
||||
@ -142,7 +142,7 @@ class LossScalingGradientTape(backprop.GradientTape):
|
||||
loss_scale = self._loss_scale()
|
||||
scaled_target = nest.map_structure(lambda t: t * loss_scale, target)
|
||||
|
||||
old_grads = super(LossScalingGradientTape, self).gradient(
|
||||
old_grads = super(LossScaleGradientTape, self).gradient(
|
||||
scaled_target, sources, output_gradients, unconnected_gradients)
|
||||
inv_loss_scale = 1.0 / self._loss_scale()
|
||||
grads = nest.map_structure(lambda g: inv_loss_scale * g, old_grads)
|
||||
|
@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for lsgt.LossScalingGradientTape."""
|
||||
"""Tests for lsgt.LossScaleGradientTape."""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
@ -27,13 +27,13 @@ from tensorflow.python.training.experimental import loss_scale as loss_scale_mod
|
||||
from tensorflow.python.training.experimental import loss_scaling_gradient_tape as lsgt
|
||||
|
||||
|
||||
class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
|
||||
@parameterized.parameters(loss_scale_module.FixedLossScale,
|
||||
loss_scale_module.DynamicLossScale)
|
||||
def test_basic_tapes_eager_mode(self, loss_scale):
|
||||
x = constant_op.constant(3.0)
|
||||
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
|
||||
g.watch(x)
|
||||
y = x * x
|
||||
dy_dx = g.gradient(y, x)
|
||||
@ -47,7 +47,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
@def_function.function
|
||||
def _inner_test():
|
||||
x = constant_op.constant(3.0)
|
||||
with lsgt.LossScalingGradientTape(loss_scale) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale) as g:
|
||||
g.watch(x)
|
||||
y = x * x
|
||||
return g.gradient(y, x)
|
||||
@ -57,9 +57,9 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
loss_scale_module.DynamicLossScale)
|
||||
def test_nested_tapes(self, loss_scale):
|
||||
x = constant_op.constant(3.0)
|
||||
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
|
||||
g.watch(x)
|
||||
with lsgt.LossScalingGradientTape(loss_scale(32)) as gg:
|
||||
with lsgt.LossScaleGradientTape(loss_scale(32)) as gg:
|
||||
gg.watch(x)
|
||||
y = x * x
|
||||
dy_dx = gg.gradient(y, x)
|
||||
@ -71,7 +71,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
loss_scale_module.DynamicLossScale)
|
||||
def test_non_persistent_tapes_error(self, loss_scale):
|
||||
x = constant_op.constant(3.0)
|
||||
with lsgt.LossScalingGradientTape(loss_scale(32), persistent=False) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale(32), persistent=False) as g:
|
||||
g.watch(x)
|
||||
y = x * x
|
||||
z = y * y
|
||||
@ -83,7 +83,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
loss_scale_module.DynamicLossScale)
|
||||
def test_persistent_tapes(self, loss_scale):
|
||||
x = constant_op.constant(3.0)
|
||||
with lsgt.LossScalingGradientTape(loss_scale(32), persistent=True) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale(32), persistent=True) as g:
|
||||
g.watch(x)
|
||||
y = x * x
|
||||
z = y * y
|
||||
@ -97,7 +97,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
def test_nested_sources(self, loss_scale):
|
||||
x = (constant_op.constant(19.0), (constant_op.constant(8.),
|
||||
constant_op.constant(9.)))
|
||||
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
|
||||
g.watch(x)
|
||||
y = x * 13
|
||||
dy_dx = g.gradient(y, x)
|
||||
@ -107,7 +107,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
loss_scale_module.DynamicLossScale)
|
||||
def test_nested_targets(self, loss_scale):
|
||||
w = constant_op.constant(3.0)
|
||||
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
|
||||
g.watch(w)
|
||||
x = w * 5
|
||||
y = w * 7
|
||||
@ -119,7 +119,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
loss_scale_module.DynamicLossScale)
|
||||
def test_scaling_inf_gradient(self, loss_scale):
|
||||
x = constant_op.constant(1.0)
|
||||
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
|
||||
g.watch(x)
|
||||
y = x * np.inf
|
||||
dy_dx = g.gradient(y, x)
|
||||
@ -129,7 +129,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
loss_scale_module.DynamicLossScale)
|
||||
def test_scaling_nan_gradient(self, loss_scale):
|
||||
x = constant_op.constant(1.0)
|
||||
with lsgt.LossScalingGradientTape(loss_scale(32)) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
|
||||
g.watch(x)
|
||||
y = x * np.nan
|
||||
dy_dx = g.gradient(y, x)
|
||||
@ -139,7 +139,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
def test_dynamic_scale_to_one_on_non_finite_gradient(self, non_finite_term):
|
||||
loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
|
||||
x = constant_op.constant(1.0)
|
||||
with lsgt.LossScalingGradientTape(loss_scale) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale) as g:
|
||||
g.watch(x)
|
||||
y = x * non_finite_term
|
||||
g.gradient(y, x)
|
||||
@ -150,7 +150,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
is_non_finite):
|
||||
loss_scale = loss_scale_module.FixedLossScale(32)
|
||||
x = constant_op.constant(1.0)
|
||||
with lsgt.LossScalingGradientTape(loss_scale) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale) as g:
|
||||
g.watch(x)
|
||||
y = x * non_finite_term
|
||||
dy_dx = g.gradient(y, x)
|
||||
@ -160,7 +160,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
def test_dynamic_loss_scaling_down_loop(self):
|
||||
loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
|
||||
x = constant_op.constant(1.0)
|
||||
with lsgt.LossScalingGradientTape(loss_scale) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale) as g:
|
||||
g.watch(x)
|
||||
y = x * (3.0 * (10**37)) # grad will be inf after scaling
|
||||
dy_dx = g.gradient(y, x)
|
||||
@ -170,7 +170,7 @@ class LossScalingGradientTapeTest(test.TestCase, parameterized.TestCase):
|
||||
def test_dynamic_loss_scaling_inf_target_post_scale(self):
|
||||
loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32.0)
|
||||
x = constant_op.constant(3.0 * (10**37))
|
||||
with lsgt.LossScalingGradientTape(loss_scale) as g:
|
||||
with lsgt.LossScaleGradientTape(loss_scale) as g:
|
||||
g.watch(x)
|
||||
y = x * 3.0 # target will be inf after scaling
|
||||
dy_dx = g.gradient(y, x)
|
||||
|
@ -1,6 +1,6 @@
|
||||
path: "tensorflow.mixed_precision.experimental.LossScalingGradientTape"
|
||||
path: "tensorflow.mixed_precision.experimental.LossScaleGradientTape"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.training.experimental.loss_scaling_gradient_tape.LossScalingGradientTape\'>"
|
||||
is_instance: "<class \'tensorflow.python.training.experimental.loss_scaling_gradient_tape.LossScaleGradientTape\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.backprop.GradientTape\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member_method {
|
@ -1,7 +1,7 @@
|
||||
path: "tensorflow.mixed_precision.experimental"
|
||||
tf_module {
|
||||
member {
|
||||
name: "LossScalingGradientTape"
|
||||
name: "LossScaleGradientTape"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user