Merge pull request #23363 from Ouwen:ouwen/weight_decay_note
PiperOrigin-RevId: 224074364
This commit is contained in:
commit
4efd674dab
@ -59,6 +59,23 @@ class DecoupledWeightDecayExtension(object):
|
|||||||
Note that this extension decays weights BEFORE applying the update based
|
Note that this extension decays weights BEFORE applying the update based
|
||||||
on the gradient, i.e. this extension only has the desired behaviour for
|
on the gradient, i.e. this extension only has the desired behaviour for
|
||||||
optimizers which do not depend on the value of'var' in the update step!
|
optimizers which do not depend on the value of'var' in the update step!
|
||||||
|
|
||||||
|
Note: when applying a decay to the learning rate, be sure to manually apply
|
||||||
|
the decay to the `weight_decay` as well. For example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
schedule = tf.train.piecewise_constant(tf.train.get_global_step(),
|
||||||
|
[10000, 15000], [1e-0, 1e-1, 1e-2])
|
||||||
|
lr = 1e-1 * schedule()
|
||||||
|
wd = lambda: 1e-4 * schedule()
|
||||||
|
|
||||||
|
# ...
|
||||||
|
|
||||||
|
optimizer = tf.contrib.opt.MomentumWOptimizer(learning_rate=lr,
|
||||||
|
weight_decay=wd,
|
||||||
|
momentum=0.9,
|
||||||
|
use_nesterov=True)
|
||||||
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, weight_decay, **kwargs):
|
def __init__(self, weight_decay, **kwargs):
|
||||||
|
Loading…
Reference in New Issue
Block a user