fix docs for rest training module
This commit is contained in:
parent
8d8ea6b0bd
commit
593272265d
@ -29,8 +29,10 @@ from tensorflow.python.util.tf_export import tf_export
|
||||
class AdadeltaOptimizer(optimizer.Optimizer):
|
||||
"""Optimizer that implements the Adadelta algorithm.
|
||||
|
||||
See [M. D. Zeiler](http://arxiv.org/abs/1212.5701)
|
||||
([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
|
||||
References:
|
||||
ADADELTA - An Adaptive Learning Rate Method:
|
||||
[Zeiler, 2012](http://arxiv.org/abs/1212.5701)
|
||||
([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
|
||||
"""
|
||||
|
||||
def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-8,
|
||||
|
@ -32,9 +32,10 @@ from tensorflow.python.util.tf_export import tf_export
|
||||
class AdagradOptimizer(optimizer.Optimizer):
|
||||
"""Optimizer that implements the Adagrad algorithm.
|
||||
|
||||
See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
|
||||
or this
|
||||
[intro](https://ppasupat.github.io/a9online/uploads/proximal_notes.pdf).
|
||||
References:
|
||||
Adaptive Subgradient Methods for Online Learning and Stochastic Optimization:
|
||||
[Duchi et al., 2011](http://jmlr.org/papers/v12/duchi11a.html)
|
||||
([pdf](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf))
|
||||
"""
|
||||
|
||||
def __init__(self, learning_rate, initial_accumulator_value=0.1,
|
||||
|
@ -30,8 +30,6 @@ from tensorflow.python.util.tf_export import tf_export
|
||||
class AdagradDAOptimizer(optimizer.Optimizer):
|
||||
"""Adagrad Dual Averaging algorithm for sparse linear models.
|
||||
|
||||
See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf).
|
||||
|
||||
This optimizer takes care of regularization of unseen features in a mini batch
|
||||
by updating them when they are seen with a closed form update rule that is
|
||||
equivalent to having updated them on every mini-batch.
|
||||
@ -40,6 +38,11 @@ class AdagradDAOptimizer(optimizer.Optimizer):
|
||||
trained model. This optimizer only guarantees sparsity for linear models. Be
|
||||
careful when using AdagradDA for deep networks as it will require careful
|
||||
initialization of the gradient accumulators for it to train.
|
||||
|
||||
References:
|
||||
Adaptive Subgradient Methods for Online Learning and Stochastic Optimization:
|
||||
[Duchi et al., 2011](http://jmlr.org/papers/v12/duchi11a.html)
|
||||
([pdf](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf))
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
|
@ -32,8 +32,10 @@ from tensorflow.python.util.tf_export import tf_export
|
||||
class AdamOptimizer(optimizer.Optimizer):
|
||||
"""Optimizer that implements the Adam algorithm.
|
||||
|
||||
See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
|
||||
([pdf](http://arxiv.org/pdf/1412.6980.pdf)).
|
||||
References:
|
||||
Adam - A Method for Stochastic Optimization:
|
||||
[Kingma et al., 2015](https://arxiv.org/abs/1412.6980)
|
||||
([pdf](https://arxiv.org/pdf/1412.6980.pdf))
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
|
@ -29,11 +29,14 @@ from tensorflow.python.util.tf_export import tf_export
|
||||
class FtrlOptimizer(optimizer.Optimizer):
|
||||
"""Optimizer that implements the FTRL algorithm.
|
||||
|
||||
See this [paper](
|
||||
https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf).
|
||||
This version has support for both online L2 (the L2 penalty given in the paper
|
||||
above) and shrinkage-type L2 (which is the addition of an L2 penalty to the
|
||||
loss function).
|
||||
This version has support for both online L2 (McMahan et al., 2013) and
|
||||
shrinkage-type L2, which is the addition of an L2 penalty
|
||||
to the loss function.
|
||||
|
||||
References:
|
||||
Ad-click prediction:
|
||||
[McMahan et al., 2013](https://dl.acm.org/citation.cfm?id=2488200)
|
||||
([pdf](https://dl.acm.org/ft_gateway.cfm?id=2488200&ftid=1388399&dwn=1&CFID=32233078&CFTOKEN=d60fe57a294c056a-CB75C374-F915-E7A6-1573FBBC7BF7D526))
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
@ -53,8 +56,7 @@ class FtrlOptimizer(optimizer.Optimizer):
|
||||
learning_rate: A float value or a constant float `Tensor`.
|
||||
learning_rate_power: A float value, must be less or equal to zero.
|
||||
Controls how the learning rate decreases during training. Use zero for
|
||||
a fixed learning rate. See section 3.1 in the
|
||||
[paper](https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf).
|
||||
a fixed learning rate. See section 3.1 in (McMahan et al., 2013).
|
||||
initial_accumulator_value: The starting value for accumulators.
|
||||
Only zero or positive values are allowed.
|
||||
l1_regularization_strength: A float value, must be greater than or
|
||||
@ -84,6 +86,11 @@ class FtrlOptimizer(optimizer.Optimizer):
|
||||
|
||||
Raises:
|
||||
ValueError: If one of the arguments is invalid.
|
||||
|
||||
References:
|
||||
Ad-click prediction:
|
||||
[McMahan et al., 2013](https://dl.acm.org/citation.cfm?id=2488200)
|
||||
([pdf](https://dl.acm.org/ft_gateway.cfm?id=2488200&ftid=1388399&dwn=1&CFID=32233078&CFTOKEN=d60fe57a294c056a-CB75C374-F915-E7A6-1573FBBC7BF7D526))
|
||||
"""
|
||||
super(FtrlOptimizer, self).__init__(use_locking, name)
|
||||
|
||||
|
@ -79,7 +79,7 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None):
|
||||
A tensor which if evaluated will compute and return the new moving average.
|
||||
|
||||
References:
|
||||
A Method for Stochastic Optimization:
|
||||
Adam - A Method for Stochastic Optimization:
|
||||
[Kingma et al., 2015](https://arxiv.org/abs/1412.6980)
|
||||
([pdf](https://arxiv.org/pdf/1412.6980.pdf))
|
||||
"""
|
||||
@ -207,7 +207,7 @@ def _zero_debias(unbiased_var, value, decay):
|
||||
tensor will also update the shadow variables appropriately.
|
||||
|
||||
References:
|
||||
A Method for Stochastic Optimization:
|
||||
Adam - A Method for Stochastic Optimization:
|
||||
[Kingma et al., 2015](https://arxiv.org/abs/1412.6980)
|
||||
([pdf](https://arxiv.org/pdf/1412.6980.pdf))
|
||||
"""
|
||||
|
Loading…
Reference in New Issue
Block a user