Added note on weight decay for tf.contrib.opt optimizers.

2018-10-30 05:37:22 +00:00 · 2018-10-30 05:37:22 +00:00 · 59617ccaca
commit 59617ccaca
parent 10f259865a
1 changed files with 17 additions and 0 deletions
--- a/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py
+++ b/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py
@ -59,6 +59,23 @@ class DecoupledWeightDecayExtension(object):
  Note that this extension decays weights BEFORE applying the update based
  on the gradient, i.e. this extension only has the desired behaviour for
  optimizers which do not depend on the value of'var' in the update step!
+  
+  Note: when applying a decay to the learning rate, be sure to manually apply
+  the decay to the `weight_decay` as well. For example:
+
+  ```python
+    decay = tf.train.piecewise_constant(tf.train.get_global_step(), 
+                                        [10000, 15000], [1e-1, 1e-2, 1e-3])
+    lr = 1*decay
+    wd = 1e-4*decay
+
+    # ...
+
+    optimizer = tf.contrib.opt.MomentumWOptimizer(learning_rate=lr,
+                                                  weight_decay=wd,
+                                                  momentum=0.9,
+                                                  use_nesterov=True)
+  ```
  """

  def __init__(self, weight_decay, **kwargs):