Documentation update to reference learning rate schedules in the optimizer documentation.

PiperOrigin-RevId: 281104367 Change-Id: Id814018dfb8f21b4d1b46b7d675838c56765b975
2019-11-18 10:49:38 -08:00 · 2019-11-18 10:49:38 -08:00 · f5866078ee
commit f5866078ee
parent ca5a5ef208
7 changed files with 14 additions and 7 deletions
--- a/tensorflow/python/keras/optimizer_v2/adadelta.py
+++ b/tensorflow/python/keras/optimizer_v2/adadelta.py
@ -74,7 +74,8 @@ class Adadelta(optimizer_v2.OptimizerV2):
    learning rate can be set, as in most other Keras optimizers.

    Args:
-      learning_rate: A `Tensor` or a floating point value. The learning rate.
+      learning_rate: A `Tensor`, floating point value, or a schedule that is a
+        `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate.
        To match the exact form in the original paper use 1.0.
      rho: A `Tensor` or a floating point value. The decay rate.
      epsilon: A `Tensor` or a floating point value.  A constant epsilon used
--- a/tensorflow/python/keras/optimizer_v2/adagrad.py
+++ b/tensorflow/python/keras/optimizer_v2/adagrad.py
@ -63,7 +63,8 @@ class Adagrad(optimizer_v2.OptimizerV2):
    """Construct a new Adagrad optimizer.

    Args:
-      learning_rate: A `Tensor` or a floating point value.  The learning rate.
+      learning_rate: A `Tensor`, floating point value, or a schedule that is a
+        `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate.
      initial_accumulator_value: A floating point value.
        Starting value for the accumulators, must be non-negative.
      epsilon: A small floating point value to avoid zero denominator.
--- a/tensorflow/python/keras/optimizer_v2/adam.py
+++ b/tensorflow/python/keras/optimizer_v2/adam.py
@ -108,7 +108,8 @@ class Adam(optimizer_v2.OptimizerV2):
    unless a variable slice was actually used).

    Args:
-      learning_rate: A Tensor or a floating point value.  The learning rate.
+      learning_rate: A `Tensor`, floating point value, or a schedule that is a
+        `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate.
      beta_1: A float value or a constant float tensor. The exponential decay
        rate for the 1st moment estimates.
      beta_2: A float value or a constant float tensor. The exponential decay
--- a/tensorflow/python/keras/optimizer_v2/adamax.py
+++ b/tensorflow/python/keras/optimizer_v2/adamax.py
@ -83,7 +83,8 @@ class Adamax(optimizer_v2.OptimizerV2):
    used).

    Args:
-      learning_rate: A Tensor or a floating point value.  The learning rate.
+      learning_rate: A `Tensor`, floating point value, or a schedule that is a
+        `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate.
      beta_1: A float value or a constant float tensor. The exponential decay
        rate for the 1st moment estimates.
      beta_2: A float value or a constant float tensor. The exponential decay
--- a/tensorflow/python/keras/optimizer_v2/ftrl.py
+++ b/tensorflow/python/keras/optimizer_v2/ftrl.py
@ -66,7 +66,8 @@ class Ftrl(optimizer_v2.OptimizerV2):
    r"""Construct a new FTRL optimizer.

    Args:
-      learning_rate: A float value or a constant float `Tensor`.
+      learning_rate: A `Tensor`, floating point value, or a schedule that is a
+        `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate.
      learning_rate_power: A float value, must be less or equal to zero.
        Controls how the learning rate decreases during training. Use zero for
        a fixed learning rate.
--- a/tensorflow/python/keras/optimizer_v2/gradient_descent.py
+++ b/tensorflow/python/keras/optimizer_v2/gradient_descent.py
@ -69,7 +69,8 @@ class SGD(optimizer_v2.OptimizerV2):
    """Construct a new Stochastic Gradient Descent or Momentum optimizer.

    Arguments:
-      learning_rate: float hyperparameter >= 0. Learning rate.
+      learning_rate: A `Tensor`, floating point value, or a schedule that is a
+        `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate.
      momentum: float hyperparameter >= 0 that accelerates SGD in the relevant
        direction and dampens oscillations.
      nesterov: boolean. Whether to apply Nesterov momentum.
--- a/tensorflow/python/keras/optimizer_v2/rmsprop.py
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py
@ -83,7 +83,8 @@ class RMSprop(optimizer_v2.OptimizerV2):
    a particular graph execution), but differs from the published algorithm.

    Args:
-      learning_rate: A Tensor or a floating point value.  The learning rate.
+      learning_rate: A `Tensor`, floating point value, or a schedule that is a
+        `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate.
      rho: Discounting factor for the history/coming gradient
      momentum: A scalar tensor.
      epsilon: Small value to avoid zero denominator.