From 7d58fd5675db8577d230c4989adc61092249e568 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Sat, 2 Mar 2019 10:36:31 -0800 Subject: [PATCH] Improve Adadelta doc. PiperOrigin-RevId: 236469934 --- tensorflow/python/keras/optimizer_v2/adadelta.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/adadelta.py b/tensorflow/python/keras/optimizer_v2/adadelta.py index a3d5538ea86..1ceb9332824 100644 --- a/tensorflow/python/keras/optimizer_v2/adadelta.py +++ b/tensorflow/python/keras/optimizer_v2/adadelta.py @@ -41,13 +41,14 @@ class Adadelta(optimizer_v2.OptimizerV2): Initialization: - $$accum_g_0 := 0 \text{(Initialize gradient 2nd order moment vector)}$$ - $$accum_x_0 := 0 \text{(Initialize variable update 2nd order moment vector)}$$ + $$E[g^2]_0 := 0 \text{(Initialize gradient 2nd order moment vector)}$$ + $$E[\Delta x^2]_0 := 0 \text{(Initialize 2nd order variable update)}$$ $$t := t + 1$$ - $$accum_g_t := rho * accum_g_{t-1} + (1 - rho) * g * g$$ - $$delta = -\sqrt{accum_x_{t-1}} / (\sqrt{accum_g_{t-1}} + \epsilon)$$ - $$accum_x_t := rho * accum_x_{t-1} + (1 - rho) * delta * delta$$ + $$E[g^2]_t := \rho * E[g^2]_{t-1} + (1 - \rho) * g^2$$ + $$\Delta x_t = -RMS[\Delta x]_{t-1} * g_t / RMS[g]_t$$ + $$E[\Delta x^2]_t := \rho * E[\Delta x^2]_{t-1} + (1 - \rho) * \Delta x_t^2$$ + $$x_t := x_{t-1} + \Delta x_{t} References See [M. D. Zeiler](http://arxiv.org/abs/1212.5701)