From f5866078ee935cad8e0be0879316c22fdd379777 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 18 Nov 2019 10:49:38 -0800 Subject: [PATCH] Documentation update to reference learning rate schedules in the optimizer documentation. PiperOrigin-RevId: 281104367 Change-Id: Id814018dfb8f21b4d1b46b7d675838c56765b975 --- tensorflow/python/keras/optimizer_v2/adadelta.py | 3 ++- tensorflow/python/keras/optimizer_v2/adagrad.py | 3 ++- tensorflow/python/keras/optimizer_v2/adam.py | 3 ++- tensorflow/python/keras/optimizer_v2/adamax.py | 3 ++- tensorflow/python/keras/optimizer_v2/ftrl.py | 3 ++- tensorflow/python/keras/optimizer_v2/gradient_descent.py | 3 ++- tensorflow/python/keras/optimizer_v2/rmsprop.py | 3 ++- 7 files changed, 14 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/adadelta.py b/tensorflow/python/keras/optimizer_v2/adadelta.py index c837ad9d0b4..e0d1093690b 100644 --- a/tensorflow/python/keras/optimizer_v2/adadelta.py +++ b/tensorflow/python/keras/optimizer_v2/adadelta.py @@ -74,7 +74,8 @@ class Adadelta(optimizer_v2.OptimizerV2): learning rate can be set, as in most other Keras optimizers. Args: - learning_rate: A `Tensor` or a floating point value. The learning rate. + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate. To match the exact form in the original paper use 1.0. rho: A `Tensor` or a floating point value. The decay rate. epsilon: A `Tensor` or a floating point value. A constant epsilon used diff --git a/tensorflow/python/keras/optimizer_v2/adagrad.py b/tensorflow/python/keras/optimizer_v2/adagrad.py index df085042d81..3eb57abc4c7 100644 --- a/tensorflow/python/keras/optimizer_v2/adagrad.py +++ b/tensorflow/python/keras/optimizer_v2/adagrad.py @@ -63,7 +63,8 @@ class Adagrad(optimizer_v2.OptimizerV2): """Construct a new Adagrad optimizer. Args: - learning_rate: A `Tensor` or a floating point value. The learning rate. + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate. initial_accumulator_value: A floating point value. Starting value for the accumulators, must be non-negative. epsilon: A small floating point value to avoid zero denominator. diff --git a/tensorflow/python/keras/optimizer_v2/adam.py b/tensorflow/python/keras/optimizer_v2/adam.py index bad306a1dfd..b0b0b956c17 100644 --- a/tensorflow/python/keras/optimizer_v2/adam.py +++ b/tensorflow/python/keras/optimizer_v2/adam.py @@ -108,7 +108,8 @@ class Adam(optimizer_v2.OptimizerV2): unless a variable slice was actually used). Args: - learning_rate: A Tensor or a floating point value. The learning rate. + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate. beta_1: A float value or a constant float tensor. The exponential decay rate for the 1st moment estimates. beta_2: A float value or a constant float tensor. The exponential decay diff --git a/tensorflow/python/keras/optimizer_v2/adamax.py b/tensorflow/python/keras/optimizer_v2/adamax.py index c68d4f95581..bd97d917146 100644 --- a/tensorflow/python/keras/optimizer_v2/adamax.py +++ b/tensorflow/python/keras/optimizer_v2/adamax.py @@ -83,7 +83,8 @@ class Adamax(optimizer_v2.OptimizerV2): used). Args: - learning_rate: A Tensor or a floating point value. The learning rate. + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate. beta_1: A float value or a constant float tensor. The exponential decay rate for the 1st moment estimates. beta_2: A float value or a constant float tensor. The exponential decay diff --git a/tensorflow/python/keras/optimizer_v2/ftrl.py b/tensorflow/python/keras/optimizer_v2/ftrl.py index 86ec5b4166a..b893271f805 100644 --- a/tensorflow/python/keras/optimizer_v2/ftrl.py +++ b/tensorflow/python/keras/optimizer_v2/ftrl.py @@ -66,7 +66,8 @@ class Ftrl(optimizer_v2.OptimizerV2): r"""Construct a new FTRL optimizer. Args: - learning_rate: A float value or a constant float `Tensor`. + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate. learning_rate_power: A float value, must be less or equal to zero. Controls how the learning rate decreases during training. Use zero for a fixed learning rate. diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent.py b/tensorflow/python/keras/optimizer_v2/gradient_descent.py index 97de0754bad..33de558addc 100644 --- a/tensorflow/python/keras/optimizer_v2/gradient_descent.py +++ b/tensorflow/python/keras/optimizer_v2/gradient_descent.py @@ -69,7 +69,8 @@ class SGD(optimizer_v2.OptimizerV2): """Construct a new Stochastic Gradient Descent or Momentum optimizer. Arguments: - learning_rate: float hyperparameter >= 0. Learning rate. + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate. momentum: float hyperparameter >= 0 that accelerates SGD in the relevant direction and dampens oscillations. nesterov: boolean. Whether to apply Nesterov momentum. diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop.py b/tensorflow/python/keras/optimizer_v2/rmsprop.py index 4750494b554..905a9136e96 100644 --- a/tensorflow/python/keras/optimizer_v2/rmsprop.py +++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py @@ -83,7 +83,8 @@ class RMSprop(optimizer_v2.OptimizerV2): a particular graph execution), but differs from the published algorithm. Args: - learning_rate: A Tensor or a floating point value. The learning rate. + learning_rate: A `Tensor`, floating point value, or a schedule that is a + `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate. rho: Discounting factor for the history/coming gradient momentum: A scalar tensor. epsilon: Small value to avoid zero denominator.