Graduate to core the following experimental learning rate schedules: CosineDecay, CosineDecayRestarts.

Remove from the API the following experimental learning rate schedules: LinearCosineDecay, NoisyLinearCosineDecay.

PiperOrigin-RevId: 350704711
Change-Id: Iebe9bc0eff38f79684e8d2f030fd838d06176494
This commit is contained in:
Francois Chollet 2021-01-07 22:49:11 -08:00 committed by TensorFlower Gardener
parent d40e992f9f
commit 33910c7d76
14 changed files with 91 additions and 118 deletions

View File

@ -36,6 +36,9 @@
* Improvements to model saving/loading:
* `model.load_weights` now accepts paths to saved models.
* Keras inputs can now be created directly from arbitrary `tf.TypeSpecs`.
* Two new learning rate schedules added:
`tf.keras.optimizers.schedules.CosineDecay` and
`tf.keras.optimizers.schedules.CosineDecayRestarts`.
* `tf.data`:
* Exposing `tf.data.experimental.ExternalStatePolicy`, which can be used

View File

@ -65,7 +65,7 @@ class LearningRateSchedule(object):
class ExponentialDecay(LearningRateSchedule):
"""A LearningRateSchedule that uses an exponential decay schedule.
When training a model, it is often recommended to lower the learning rate as
When training a model, it is often useful to lower the learning rate as
the training progresses. This schedule applies an exponential decay function
to an optimizer step, given a provided initial learning rate.
@ -416,7 +416,7 @@ class PolynomialDecay(LearningRateSchedule):
class InverseTimeDecay(LearningRateSchedule):
"""A LearningRateSchedule that uses an inverse time decay schedule.
When training a model, it is often recommended to lower the learning rate as
When training a model, it is often useful to lower the learning rate as
the training progresses. This schedule applies the inverse decay function
to an optimizer step, given a provided initial learning rate.
It requires a `step` value to compute the decayed learning rate. You can
@ -518,14 +518,15 @@ class InverseTimeDecay(LearningRateSchedule):
}
@keras_export("keras.experimental.CosineDecay")
@keras_export("keras.optimizers.schedules.CosineDecay",
"keras.experimental.CosineDecay")
class CosineDecay(LearningRateSchedule):
"""A LearningRateSchedule that uses a cosine decay schedule.
See [Loshchilov & Hutter, ICLR2016], SGDR: Stochastic Gradient Descent
with Warm Restarts. https://arxiv.org/abs/1608.03983
See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983),
SGDR: Stochastic Gradient Descent with Warm Restarts.
When training a model, it is often recommended to lower the learning rate as
When training a model, it is often useful to lower the learning rate as
the training progresses. This schedule applies a cosine decay function
to an optimizer step, given a provided initial learning rate.
It requires a `step` value to compute the decayed learning rate. You can
@ -547,7 +548,7 @@ class CosineDecay(LearningRateSchedule):
Example usage:
```python
decay_steps = 1000
lr_decayed_fn = tf.keras.experimental.CosineDecay(
lr_decayed_fn = tf.keras.optimizers.schedules.CosineDecay(
initial_learning_rate, decay_steps)
```
@ -611,14 +612,15 @@ class CosineDecay(LearningRateSchedule):
}
@keras_export("keras.experimental.CosineDecayRestarts")
@keras_export("keras.optimizers.schedules.CosineDecayRestarts",
"keras.experimental.CosineDecayRestarts")
class CosineDecayRestarts(LearningRateSchedule):
"""A LearningRateSchedule that uses a cosine decay schedule with restarts.
See [Loshchilov & Hutter, ICLR2016], SGDR: Stochastic Gradient Descent
with Warm Restarts. https://arxiv.org/abs/1608.03983
See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983),
SGDR: Stochastic Gradient Descent with Warm Restarts.
When training a model, it is often recommended to lower the learning rate as
When training a model, it is often useful to lower the learning rate as
the training progresses. This schedule applies a cosine decay function with
restarts to an optimizer step, given a provided initial learning rate.
It requires a `step` value to compute the decayed learning rate. You can
@ -637,7 +639,7 @@ class CosineDecayRestarts(LearningRateSchedule):
```python
first_decay_steps = 1000
lr_decayed_fn = (
tf.keras.experimental.CosineDecayRestarts(
tf.keras.optimizers.schedules.CosineDecayRestarts(
initial_learning_rate,
first_decay_steps))
```
@ -737,7 +739,7 @@ class CosineDecayRestarts(LearningRateSchedule):
}
@keras_export("keras.experimental.LinearCosineDecay")
# Note: this code is still used by V1 APIs.
class LinearCosineDecay(LearningRateSchedule):
"""A LearningRateSchedule that uses a linear cosine decay schedule.
@ -855,7 +857,7 @@ class LinearCosineDecay(LearningRateSchedule):
}
@keras_export("keras.experimental.NoisyLinearCosineDecay")
# Note: this code is still used by V1 APIs.
class NoisyLinearCosineDecay(LearningRateSchedule):
"""A LearningRateSchedule that uses a noisy linear cosine decay schedule.

View File

@ -433,81 +433,5 @@ class CosineDecayRestartsTestV2(test_util.TensorFlowTestCase,
self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6)
@combinations.generate(combinations.combine(serialize=[False, True],
mode=["graph", "eager"]))
class LinearCosineDecayTestV2(test_util.TensorFlowTestCase,
parameterized.TestCase):
def np_linear_cosine_decay(self,
step,
decay_steps,
alpha=0.0,
beta=0.001,
num_periods=0.5):
step = min(step, decay_steps)
linear_decayed = float(decay_steps - step) / decay_steps
fraction = 2.0 * num_periods * step / float(decay_steps)
cosine_decayed = 0.5 * (1.0 + math.cos(math.pi * fraction))
return (alpha + linear_decayed) * cosine_decayed + beta
def testDefaultDecay(self, serialize):
num_training_steps = 1000
initial_lr = 1.0
for step in range(0, 1500, 250):
decayed_lr = learning_rate_schedule.LinearCosineDecay(
initial_lr, num_training_steps)
decayed_lr = _maybe_serialized(decayed_lr, serialize)
expected = self.np_linear_cosine_decay(step, num_training_steps)
self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6)
def testNonDefaultDecay(self, serialize):
num_training_steps = 1000
initial_lr = 1.0
for step in range(0, 1500, 250):
decayed_lr = learning_rate_schedule.LinearCosineDecay(
initial_lr,
num_training_steps,
alpha=0.1,
beta=1e-4,
num_periods=5)
decayed_lr = _maybe_serialized(decayed_lr, serialize)
expected = self.np_linear_cosine_decay(
step, num_training_steps, alpha=0.1, beta=1e-4, num_periods=5)
self.assertAllClose(self.evaluate(decayed_lr(step)), expected, 1e-6)
@combinations.generate(combinations.combine(serialize=[False, True],
mode=["graph", "eager"]))
class NoisyLinearCosineDecayTestV2(test_util.TensorFlowTestCase,
parameterized.TestCase):
def testDefaultNoisyLinearCosine(self, serialize):
num_training_steps = 1000
initial_lr = 1.0
for step in range(0, 1500, 250):
# No numerical check because of noise
decayed_lr = learning_rate_schedule.NoisyLinearCosineDecay(
initial_lr, num_training_steps)
decayed_lr = _maybe_serialized(decayed_lr, serialize)
# Cannot be deterministically tested
self.evaluate(decayed_lr(step))
def testNonDefaultNoisyLinearCosine(self, serialize):
num_training_steps = 1000
initial_lr = 1.0
for step in range(0, 1500, 250):
# No numerical check because of noise
decayed_lr = learning_rate_schedule.NoisyLinearCosineDecay(
initial_lr,
num_training_steps,
initial_variance=0.5,
variance_decay=0.1,
alpha=0.1,
beta=1e-4,
num_periods=5)
decayed_lr = _maybe_serialized(decayed_lr, serialize)
# Cannot be deterministically tested
self.evaluate(decayed_lr(step))
if __name__ == "__main__":
test.main()

View File

@ -8,18 +8,10 @@ tf_module {
name: "CosineDecayRestarts"
mtype: "<type \'type\'>"
}
member {
name: "LinearCosineDecay"
mtype: "<type \'type\'>"
}
member {
name: "LinearModel"
mtype: "<type \'type\'>"
}
member {
name: "NoisyLinearCosineDecay"
mtype: "<type \'type\'>"
}
member {
name: "PeepholeLSTMCell"
mtype: "<type \'type\'>"

View File

@ -1,11 +1,11 @@
path: "tensorflow.keras.experimental.NoisyLinearCosineDecay"
path: "tensorflow.keras.optimizers.schedules.CosineDecayRestarts"
tf_class {
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.NoisyLinearCosineDecay\'>"
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecayRestarts\'>"
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'initial_variance\', \'variance_decay\', \'num_periods\', \'alpha\', \'beta\', \'name\'], varargs=None, keywords=None, defaults=[\'1.0\', \'0.55\', \'0.5\', \'0.0\', \'0.001\', \'None\'], "
argspec: "args=[\'self\', \'initial_learning_rate\', \'first_decay_steps\', \'t_mul\', \'m_mul\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'2.0\', \'1.0\', \'0.0\', \'None\'], "
}
member_method {
name: "from_config"

View File

@ -1,11 +1,11 @@
path: "tensorflow.keras.experimental.LinearCosineDecay"
path: "tensorflow.keras.optimizers.schedules.CosineDecay"
tf_class {
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LinearCosineDecay\'>"
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecay\'>"
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'num_periods\', \'alpha\', \'beta\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'0.0\', \'0.001\', \'None\'], "
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
}
member_method {
name: "from_config"

View File

@ -1,5 +1,13 @@
path: "tensorflow.keras.optimizers.schedules"
tf_module {
member {
name: "CosineDecay"
mtype: "<type \'type\'>"
}
member {
name: "CosineDecayRestarts"
mtype: "<type \'type\'>"
}
member {
name: "ExponentialDecay"
mtype: "<type \'type\'>"

View File

@ -8,18 +8,10 @@ tf_module {
name: "CosineDecayRestarts"
mtype: "<type \'type\'>"
}
member {
name: "LinearCosineDecay"
mtype: "<type \'type\'>"
}
member {
name: "LinearModel"
mtype: "<type \'type\'>"
}
member {
name: "NoisyLinearCosineDecay"
mtype: "<type \'type\'>"
}
member {
name: "PeepholeLSTMCell"
mtype: "<type \'type\'>"

View File

@ -0,0 +1,18 @@
path: "tensorflow.keras.optimizers.schedules.CosineDecayRestarts"
tf_class {
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecayRestarts\'>"
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
argspec: "args=[\'self\', \'initial_learning_rate\', \'first_decay_steps\', \'t_mul\', \'m_mul\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'2.0\', \'1.0\', \'0.0\', \'None\'], "
}
member_method {
name: "from_config"
argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "get_config"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
}

View File

@ -1,11 +1,11 @@
path: "tensorflow.keras.experimental.LinearCosineDecay"
path: "tensorflow.keras.optimizers.schedules.CosineDecay"
tf_class {
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LinearCosineDecay\'>"
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecay\'>"
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'num_periods\', \'alpha\', \'beta\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'0.0\', \'0.001\', \'None\'], "
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
}
member_method {
name: "from_config"

View File

@ -1,5 +1,13 @@
path: "tensorflow.keras.optimizers.schedules"
tf_module {
member {
name: "CosineDecay"
mtype: "<type \'type\'>"
}
member {
name: "CosineDecayRestarts"
mtype: "<type \'type\'>"
}
member {
name: "ExponentialDecay"
mtype: "<type \'type\'>"

View File

@ -0,0 +1,18 @@
path: "tensorflow.optimizers.schedules.CosineDecayRestarts"
tf_class {
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecayRestarts\'>"
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
argspec: "args=[\'self\', \'initial_learning_rate\', \'first_decay_steps\', \'t_mul\', \'m_mul\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'2.0\', \'1.0\', \'0.0\', \'None\'], "
}
member_method {
name: "from_config"
argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "get_config"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
}

View File

@ -1,11 +1,11 @@
path: "tensorflow.keras.experimental.NoisyLinearCosineDecay"
path: "tensorflow.optimizers.schedules.CosineDecay"
tf_class {
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.NoisyLinearCosineDecay\'>"
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.CosineDecay\'>"
is_instance: "<class \'tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule\'>"
is_instance: "<type \'object\'>"
member_method {
name: "__init__"
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'initial_variance\', \'variance_decay\', \'num_periods\', \'alpha\', \'beta\', \'name\'], varargs=None, keywords=None, defaults=[\'1.0\', \'0.55\', \'0.5\', \'0.0\', \'0.001\', \'None\'], "
argspec: "args=[\'self\', \'initial_learning_rate\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
}
member_method {
name: "from_config"

View File

@ -1,5 +1,13 @@
path: "tensorflow.optimizers.schedules"
tf_module {
member {
name: "CosineDecay"
mtype: "<type \'type\'>"
}
member {
name: "CosineDecayRestarts"
mtype: "<type \'type\'>"
}
member {
name: "ExponentialDecay"
mtype: "<type \'type\'>"