fix docs for rest training module

This commit is contained in:
mrTsjolder 2018-11-29 22:06:24 +01:00
parent 8d8ea6b0bd
commit 593272265d
6 changed files with 33 additions and 18 deletions

View File

@ -29,8 +29,10 @@ from tensorflow.python.util.tf_export import tf_export
class AdadeltaOptimizer(optimizer.Optimizer):
"""Optimizer that implements the Adadelta algorithm.
See [M. D. Zeiler](http://arxiv.org/abs/1212.5701)
([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
References:
ADADELTA - An Adaptive Learning Rate Method:
[Zeiler, 2012](http://arxiv.org/abs/1212.5701)
([pdf](http://arxiv.org/pdf/1212.5701v1.pdf))
"""
def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1e-8,

View File

@ -32,9 +32,10 @@ from tensorflow.python.util.tf_export import tf_export
class AdagradOptimizer(optimizer.Optimizer):
"""Optimizer that implements the Adagrad algorithm.
See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
or this
[intro](https://ppasupat.github.io/a9online/uploads/proximal_notes.pdf).
References:
Adaptive Subgradient Methods for Online Learning and Stochastic Optimization:
[Duchi et al., 2011](http://jmlr.org/papers/v12/duchi11a.html)
([pdf](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf))
"""
def __init__(self, learning_rate, initial_accumulator_value=0.1,

View File

@ -30,8 +30,6 @@ from tensorflow.python.util.tf_export import tf_export
class AdagradDAOptimizer(optimizer.Optimizer):
"""Adagrad Dual Averaging algorithm for sparse linear models.
See this [paper](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf).
This optimizer takes care of regularization of unseen features in a mini batch
by updating them when they are seen with a closed form update rule that is
equivalent to having updated them on every mini-batch.
@ -40,6 +38,11 @@ class AdagradDAOptimizer(optimizer.Optimizer):
trained model. This optimizer only guarantees sparsity for linear models. Be
careful when using AdagradDA for deep networks as it will require careful
initialization of the gradient accumulators for it to train.
References:
Adaptive Subgradient Methods for Online Learning and Stochastic Optimization:
[Duchi et al., 2011](http://jmlr.org/papers/v12/duchi11a.html)
([pdf](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf))
"""
def __init__(self,

View File

@ -32,8 +32,10 @@ from tensorflow.python.util.tf_export import tf_export
class AdamOptimizer(optimizer.Optimizer):
"""Optimizer that implements the Adam algorithm.
See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
([pdf](http://arxiv.org/pdf/1412.6980.pdf)).
References:
Adam - A Method for Stochastic Optimization:
[Kingma et al., 2015](https://arxiv.org/abs/1412.6980)
([pdf](https://arxiv.org/pdf/1412.6980.pdf))
"""
def __init__(self,

View File

@ -29,11 +29,14 @@ from tensorflow.python.util.tf_export import tf_export
class FtrlOptimizer(optimizer.Optimizer):
"""Optimizer that implements the FTRL algorithm.
See this [paper](
https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf).
This version has support for both online L2 (the L2 penalty given in the paper
above) and shrinkage-type L2 (which is the addition of an L2 penalty to the
loss function).
This version has support for both online L2 (McMahan et al., 2013) and
shrinkage-type L2, which is the addition of an L2 penalty
to the loss function.
References:
Ad-click prediction:
[McMahan et al., 2013](https://dl.acm.org/citation.cfm?id=2488200)
([pdf](https://dl.acm.org/ft_gateway.cfm?id=2488200&ftid=1388399&dwn=1&CFID=32233078&CFTOKEN=d60fe57a294c056a-CB75C374-F915-E7A6-1573FBBC7BF7D526))
"""
def __init__(self,
@ -53,8 +56,7 @@ class FtrlOptimizer(optimizer.Optimizer):
learning_rate: A float value or a constant float `Tensor`.
learning_rate_power: A float value, must be less or equal to zero.
Controls how the learning rate decreases during training. Use zero for
a fixed learning rate. See section 3.1 in the
[paper](https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf).
a fixed learning rate. See section 3.1 in (McMahan et al., 2013).
initial_accumulator_value: The starting value for accumulators.
Only zero or positive values are allowed.
l1_regularization_strength: A float value, must be greater than or
@ -84,6 +86,11 @@ class FtrlOptimizer(optimizer.Optimizer):
Raises:
ValueError: If one of the arguments is invalid.
References:
Ad-click prediction:
[McMahan et al., 2013](https://dl.acm.org/citation.cfm?id=2488200)
([pdf](https://dl.acm.org/ft_gateway.cfm?id=2488200&ftid=1388399&dwn=1&CFID=32233078&CFTOKEN=d60fe57a294c056a-CB75C374-F915-E7A6-1573FBBC7BF7D526))
"""
super(FtrlOptimizer, self).__init__(use_locking, name)

View File

@ -79,7 +79,7 @@ def assign_moving_average(variable, value, decay, zero_debias=True, name=None):
A tensor which if evaluated will compute and return the new moving average.
References:
A Method for Stochastic Optimization:
Adam - A Method for Stochastic Optimization:
[Kingma et al., 2015](https://arxiv.org/abs/1412.6980)
([pdf](https://arxiv.org/pdf/1412.6980.pdf))
"""
@ -207,7 +207,7 @@ def _zero_debias(unbiased_var, value, decay):
tensor will also update the shadow variables appropriately.
References:
A Method for Stochastic Optimization:
Adam - A Method for Stochastic Optimization:
[Kingma et al., 2015](https://arxiv.org/abs/1412.6980)
([pdf](https://arxiv.org/pdf/1412.6980.pdf))
"""