diff --git a/tensorflow/python/keras/optimizer_v2/adam.py b/tensorflow/python/keras/optimizer_v2/adam.py index 29d3beea2b4..f9ff0a98e9d 100644 --- a/tensorflow/python/keras/optimizer_v2/adam.py +++ b/tensorflow/python/keras/optimizer_v2/adam.py @@ -32,16 +32,16 @@ class Adam(optimizer_v2.OptimizerV2): """Optimizer that implements the Adam algorithm. Adam optimization is a stochastic gradient descent method that is based on - adaptive estimation of first-order and second-order moments. According to the - reference, the method is 'computationally efficient, has little memory + adaptive estimation of first-order and second-order moments. + According to the paper + [Adam: A Method for Stochastic Optimization. Kingma et al., + 2014](http://arxiv.org/abs/1412.6980), + the method is "*computationally efficient, has little memory requirement, invariant to diagonal rescaling of gradients, and is well suited - for problems that are large in terms of data/parameters'. + for problems that are large in terms of data/parameters*". - # References - See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) - ([pdf](http://arxiv.org/pdf/1412.6980.pdf)). - For AMSGrad see [Reddi et al., 2-18] - (https://openreview.net/pdf?id=ryQu7f-RZ) + For AMSGrad see [On The Convergence Of Adam And Beyond. + Reddi et al., 5-8](https://openreview.net/pdf?id=ryQu7f-RZ). """ def __init__(self,