Graduate to core the following experimental learning rate schedules: CosineDecay, CosineDecayRestarts.
Remove from the API the following experimental learning rate schedules: LinearCosineDecay, NoisyLinearCosineDecay. PiperOrigin-RevId: 350704711 Change-Id: Iebe9bc0eff38f79684e8d2f030fd838d06176494
This commit is contained in:
parent
d40e992f9f
commit
33910c7d76
RELEASE.md
tensorflow
python/keras/optimizer_v2
tools/api/golden
v1
tensorflow.keras.experimental.pbtxttensorflow.keras.optimizers.schedules.-cosine-decay-restarts.pbtxttensorflow.keras.optimizers.schedules.-cosine-decay.pbtxttensorflow.keras.optimizers.schedules.pbtxt
v2
tensorflow.keras.experimental.pbtxttensorflow.keras.optimizers.schedules.-cosine-decay-restarts.pbtxttensorflow.keras.optimizers.schedules.-cosine-decay.pbtxttensorflow.keras.optimizers.schedules.pbtxttensorflow.optimizers.schedules.-cosine-decay-restarts.pbtxttensorflow.optimizers.schedules.-cosine-decay.pbtxttensorflow.optimizers.schedules.pbtxt
@ -36,6 +36,9 @@
|
||||
* Improvements to model saving/loading:
|
||||
* `model.load_weights` now accepts paths to saved models.
|
||||
* Keras inputs can now be created directly from arbitrary `tf.TypeSpecs`.
|
||||
* Two new learning rate schedules added:
|
||||
`tf.keras.optimizers.schedules.CosineDecay` and
|
||||
`tf.keras.optimizers.schedules.CosineDecayRestarts`.
|
||||
|
||||
* `tf.data`:
|
||||
* Exposing `tf.data.experimental.ExternalStatePolicy`, which can be used
|
||||
|
@ -65,7 +65,7 @@ class LearningRateSchedule(object):
|
||||
class ExponentialDecay(LearningRateSchedule):
|
||||
"""A LearningRateSchedule that uses an exponential decay schedule.
|
||||
|
||||
When training a model, it is often recommended to lower the learning rate as
|
||||
When training a model, it is often useful to lower the learning rate as
|
||||
the training progresses. This schedule applies an exponential decay function
|
||||
to an optimizer step, given a provided initial learning rate.
|
||||
|
||||
@ -416,7 +416,7 @@ class PolynomialDecay(LearningRateSchedule):
|
||||
class InverseTimeDecay(LearningRateSchedule):
|
||||
"""A LearningRateSchedule that uses an inverse time decay schedule.
|
||||
|
||||
When training a model, it is often recommended to lower the learning rate as
|
||||
When training a model, it is often useful to lower the learning rate as
|
||||
the training progresses. This schedule applies the inverse decay function
|
||||
to an optimizer step, given a provided initial learning rate.
|
||||
It requires a `step` value to compute the decayed learning rate. You can
|
||||
@ -518,14 +518,15 @@ class InverseTimeDecay(LearningRateSchedule):
|
||||
}
|
||||
|
||||
|
||||
@keras_export("keras.experimental.CosineDecay")
|
||||
@keras_export("keras.optimizers.schedules.CosineDecay",
|
||||
"keras.experimental.CosineDecay")
|
||||
class CosineDecay(LearningRateSchedule):
|
||||
"""A LearningRateSchedule that uses a cosine decay schedule.
|
||||
|
||||
See [Loshchilov & Hutter, ICLR2016], SGDR: Stochastic Gradient Descent
|
||||
with Warm Restarts. https://arxiv.org/abs/1608.03983
|
||||
See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983),
|
||||
SGDR: Stochastic Gradient Descent with Warm Restarts.
|
||||
|
||||
When training a model, it is often recommended to lower the learning rate as
|
||||
When training a model, it is often useful to lower the learning rate as
|
||||
the training progresses. This schedule applies a cosine decay function
|
||||
to an optimizer step, given a provided initial learning rate.
|
||||
It requires a `step` value to compute the decayed learning rate. You can
|
||||
@ -547,7 +548,7 @@ class CosineDecay(LearningRateSchedule):
|
||||
Example usage:
|
||||
```python
|
||||
decay_steps = 1000
|
||||
lr_decayed_fn = tf.keras.experimental.CosineDecay(
|
||||
lr_decayed_fn = tf.keras.optimizers.schedules.CosineDecay(
|
||||
initial_learning_rate, decay_steps)
|
||||
```
|
||||
|
||||
@ -611,14 +612,15 @@ class CosineDecay(LearningRateSchedule):
|
||||
}
|
||||
|
||||
|
||||
@keras_export("keras.experimental.CosineDecayRestarts")
|
||||
@keras_export("keras.optimizers.schedules.CosineDecayRestarts",
|
||||
"keras.experimental.CosineDecayRestarts")
|
||||
class CosineDecayRestarts(LearningRateSchedule):
|
||||
"""A LearningRateSchedule that uses a cosine decay schedule with restarts.
|
||||
|
||||
See [Loshchilov & Hutter, ICLR2016], SGDR: Stochastic Gradient Descent
|
||||
with Warm Restarts. https://arxiv.org/abs/1608.03983
|
||||
See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983),
|
||||
SGDR: Stochastic Gradient Descent with Warm Restarts.
|
||||
|
||||
When training a model, it is often recommended to lower the learning rate as
|
||||
When training a model, it is often useful to lower the learning rate as
|
||||
the training progresses. This schedule applies a cosine decay function with
|
||||
restarts to an optimizer step, given a provided initial learning rate.
|
||||
It requires a `step` value to compute the decayed learning rate. You can
|
||||
@ -637,7 +639,7 @@ class CosineDecayRestarts(LearningRateSchedule):
|
||||
```python
|
||||
first_decay_steps = 1000
|
||||
lr_decayed_fn = (
|
||||
tf.keras.experimental.CosineDecayRestarts(
|
||||
tf.keras.optimizers.schedules.CosineDecayRestarts(
|
||||
initial_learning_rate,
|
||||
first_decay_steps))
|
||||
```
|
||||
@ -737,7 +739,7 @@ class CosineDecayRestarts(LearningRateSchedule):
|
||||
}
|
||||
|
||||
|
||||
@keras_export("keras.experimental.LinearCosineDecay")
|
||||
# Note: this code is still used by V1 APIs.
|
||||
class LinearCosineDecay(LearningRateSchedule):
|
||||
"""A LearningRateSchedule that uses a linear cosine decay schedule.
|
||||
|
||||
@ -855,7 +857,7 @@ class LinearCosineDecay(LearningRateSchedule):
|
||||
}
|
||||
|
||||
|
||||
@keras_export("keras.experimental.NoisyLinearCosineDecay")
|
||||
# Note: this code is still used by V1 APIs.
|
||||
class NoisyLinearCosineDecay(LearningRateSchedule):
|
||||
"""A LearningRateSchedule that uses a noisy linear cosine decay schedule.
|
||||
|
||||
|
@ -433,81 +433,5 @@ class CosineDecayRestartsTestV2(test_util.TensorFlowTestCase,
|
||||
self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6)
|
||||
|
||||
|
||||
@combinations.generate(combinations.combine(serialize=[False, True],
|
||||
mode=["graph", "eager"]))
|
||||
class LinearCosineDecayTestV2(test_util.TensorFlowTestCase,
|
||||
parameterized.TestCase):
|
||||
|
||||
def np_linear_cosine_decay(self,
|
||||
step,
|
||||
decay_steps,
|
||||
alpha=0.0,
|
||||
beta=0.001,
|
||||
num_periods=0.5):
|
||||
step = min(step, decay_steps)
|
||||
linear_decayed = float(decay_steps - step) / decay_steps
|
||||
fraction = 2.0 * num_periods * step / float(decay_steps)
|
||||
cosine_decayed = 0.5 * (1.0 + math.cos(math.pi * fraction))
|
||||
return (alpha + linear_decayed) * cosine_decayed + beta
|
||||
|
||||
def testDefaultDecay(self, serialize):
|
||||
num_training_steps = 1000
|
||||
initial_lr = 1.0
|
||||
for step in range(0, 1500, 250):
|
||||
decayed_lr = learning_rate_schedule.LinearCosineDecay(
|
||||
initial_lr, num_training_steps)
|
||||
decayed_lr = _maybe_serialized(decayed_lr, serialize)
|
||||
expected = self.np_linear_cosine_decay(step, num_training_steps)
|
||||
self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6)
|
||||
|
||||
def testNonDefaultDecay(self, serialize):
|
||||
num_training_steps = 1000
|
||||
initial_lr = 1.0
|
||||
for step in range(0, 1500, 250):
|
||||
decayed_lr = learning_rate_schedule.LinearCosineDecay(
|
||||
initial_lr,
|
||||
num_training_steps,
|
||||
alpha=0.1,
|
||||
beta=1e-4,
|
||||
num_periods=5)
|
||||
decayed_lr = _maybe_serialized(decayed_lr, serialize)
|
||||
expected = self.np_linear_cosine_decay(
|
||||
step, num_training_steps, alpha=0.1, beta=1e-4, num_periods=5)
|
||||
self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6)
|
||||
|
||||
|
||||
@combinations.generate(combinations.combine(serialize=[False, True],
|
||||
mode=["graph", "eager"]))
|
||||
class NoisyLinearCosineDecayTestV2(test_util.TensorFlowTestCase,
|
||||
parameterized.TestCase):
|
||||
|
||||
def testDefaultNoisyLinearCosine(self, serialize):
|
||||
num_training_steps = 1000
|
||||
initial_lr = 1.0
|
||||
for step in range(0, 1500, 250):
|
||||
# No numerical check because of noise
|
||||
decayed_lr = learning_rate_schedule.NoisyLinearCosineDecay(
|
||||
initial_lr, num_training_steps)
|
||||
decayed_lr = _maybe_serialized(decayed_lr, serialize)
|
||||
# Cannot be deterministically tested
|
||||
self.evaluate(decayed_lr(step))
|
||||
|
||||
def testNonDefaultNoisyLinearCosine(self, serialize):
|
||||
num_training_steps = 1000
|
||||
initial_lr = 1.0
|
||||
for step in range(0, 1500, 250):
|
||||
# No numerical check because of noise
|
||||
decayed_lr = learning_rate_schedule.NoisyLinearCosineDecay(
|
||||
initial_lr,
|
||||
num_training_steps,
|
||||
initial_variance=0.5,
|
||||
variance_decay=0.1,
|
||||
alpha=0.1,
|
||||
beta=1e-4,
|
||||
num_periods=5)
|
||||
decayed_lr = _maybe_serialized(decayed_lr, serialize)
|
||||
# Cannot be deterministically tested
|
||||
self.evaluate(decayed_lr(step))
|
||||
|
||||
if __name__ == "__main__":
|
||||
test.main()
|
||||
|
@ -8,18 +8,10 @@ tf_module {
|
||||
name: "CosineDecayRestarts"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "LinearCosineDecay"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "LinearModel"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "NoisyLinearCosineDecay"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "PeepholeLSTMCell"
|
||||
mtype: "<type \'type\'>"
|
||||
|
@ -1,11 +1,11 @@
|
||||
path: "tensorflow.keras.experimental.NoisyLinearCosineDecay"
|
||||
path: "tensorflow.keras.optimizers.schedules.CosineDecayRestarts"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.NoisyLinearCosineDecay\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecayRestarts\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'initial_variance\', \'variance_decay\', \'num_periods\', \'alpha\', \'beta\', \'name\'], varargs=None, keywords=None, defaults=[\'1.0\', \'0.55\', \'0.5\', \'0.0\', \'0.001\', \'None\'], "
|
||||
argspec: "args=[\'self\', \'initial_learning_rate\', \'first_decay_steps\', \'t_mul\', \'m_mul\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'2.0\', \'1.0\', \'0.0\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
@ -1,11 +1,11 @@
|
||||
path: "tensorflow.keras.experimental.LinearCosineDecay"
|
||||
path: "tensorflow.keras.optimizers.schedules.CosineDecay"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LinearCosineDecay\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecay\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'num_periods\', \'alpha\', \'beta\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'0.0\', \'0.001\', \'None\'], "
|
||||
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
@ -1,5 +1,13 @@
|
||||
path: "tensorflow.keras.optimizers.schedules"
|
||||
tf_module {
|
||||
member {
|
||||
name: "CosineDecay"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "CosineDecayRestarts"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "ExponentialDecay"
|
||||
mtype: "<type \'type\'>"
|
||||
|
@ -8,18 +8,10 @@ tf_module {
|
||||
name: "CosineDecayRestarts"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "LinearCosineDecay"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "LinearModel"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "NoisyLinearCosineDecay"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "PeepholeLSTMCell"
|
||||
mtype: "<type \'type\'>"
|
||||
|
@ -0,0 +1,18 @@
|
||||
path: "tensorflow.keras.optimizers.schedules.CosineDecayRestarts"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecayRestarts\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'initial_learning_rate\', \'first_decay_steps\', \'t_mul\', \'m_mul\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'2.0\', \'1.0\', \'0.0\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_config"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
}
|
@ -1,11 +1,11 @@
|
||||
path: "tensorflow.keras.experimental.LinearCosineDecay"
|
||||
path: "tensorflow.keras.optimizers.schedules.CosineDecay"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LinearCosineDecay\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecay\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'num_periods\', \'alpha\', \'beta\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'0.0\', \'0.001\', \'None\'], "
|
||||
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
@ -1,5 +1,13 @@
|
||||
path: "tensorflow.keras.optimizers.schedules"
|
||||
tf_module {
|
||||
member {
|
||||
name: "CosineDecay"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "CosineDecayRestarts"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "ExponentialDecay"
|
||||
mtype: "<type \'type\'>"
|
||||
|
@ -0,0 +1,18 @@
|
||||
path: "tensorflow.optimizers.schedules.CosineDecayRestarts"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecayRestarts\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'initial_learning_rate\', \'first_decay_steps\', \'t_mul\', \'m_mul\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'2.0\', \'1.0\', \'0.0\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
||||
argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_config"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
}
|
@ -1,11 +1,11 @@
|
||||
path: "tensorflow.keras.experimental.NoisyLinearCosineDecay"
|
||||
path: "tensorflow.optimizers.schedules.CosineDecay"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.NoisyLinearCosineDecay\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecay\'>"
|
||||
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'initial_variance\', \'variance_decay\', \'num_periods\', \'alpha\', \'beta\', \'name\'], varargs=None, keywords=None, defaults=[\'1.0\', \'0.55\', \'0.5\', \'0.0\', \'0.001\', \'None\'], "
|
||||
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "from_config"
|
@ -1,5 +1,13 @@
|
||||
path: "tensorflow.optimizers.schedules"
|
||||
tf_module {
|
||||
member {
|
||||
name: "CosineDecay"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "CosineDecayRestarts"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "ExponentialDecay"
|
||||
mtype: "<type \'type\'>"
|
||||
|
Loading…
Reference in New Issue
Block a user