Merge pull request #23363 from Ouwen:ouwen/weight_decay_note

PiperOrigin-RevId: 224074364
2018-12-04 17:27:46 -08:00 · 2018-12-04 17:27:46 -08:00 · 4efd674dab
commit 4efd674dab
parent a74f0484b5 71ba0ec86e
1 changed files with 17 additions and 0 deletions
--- a/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py
+++ b/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py
@ -59,6 +59,23 @@ class DecoupledWeightDecayExtension(object):
  Note that this extension decays weights BEFORE applying the update based
  on the gradient, i.e. this extension only has the desired behaviour for
  optimizers which do not depend on the value of'var' in the update step!
+  
+  Note: when applying a decay to the learning rate, be sure to manually apply
+  the decay to the `weight_decay` as well. For example:
+
+  ```python
+    schedule = tf.train.piecewise_constant(tf.train.get_global_step(), 
+                                           [10000, 15000], [1e-0, 1e-1, 1e-2])
+    lr = 1e-1 * schedule()
+    wd = lambda: 1e-4 * schedule()
+
+    # ...
+
+    optimizer = tf.contrib.opt.MomentumWOptimizer(learning_rate=lr,
+                                                  weight_decay=wd,
+                                                  momentum=0.9,
+                                                  use_nesterov=True)
+  ```
  """

  def __init__(self, weight_decay, **kwargs):