From 0f7df20b2ca2eae753437503b7fe80bf09315ad3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" <gardener@tensorflow.org> Date: Tue, 11 Apr 2017 04:46:01 -0800 Subject: [PATCH] Allow callers to control the names of the slot variables. Other optimizers which have only a single slot variable allow control through the 'name' constructor parameter, but the FtrlOptimizer has two variables. Because they both are created with the same 'name' parameter, one of them has name as a suffix, and the other has name + "_1" as a suffix. This change allows them to be specified in a more controllable way. Change: 152802478 --- tensorflow/python/training/ftrl.py | 63 ++++++++++++------- .../tensorflow.train.-ftrl-optimizer.pbtxt | 2 +- 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/training/ftrl.py b/tensorflow/python/training/ftrl.py index f5b89596120..618f3baf089 100644 --- a/tensorflow/python/training/ftrl.py +++ b/tensorflow/python/training/ftrl.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """Ftrl-proximal for TensorFlow.""" from __future__ import absolute_import from __future__ import division @@ -32,12 +31,16 @@ class FtrlOptimizer(optimizer.Optimizer): https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf). """ - def __init__(self, learning_rate, + def __init__(self, + learning_rate, learning_rate_power=-0.5, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0, - use_locking=False, name="Ftrl"): + use_locking=False, + name="Ftrl", + accum_name=None, + linear_name=None): """Construct a new FTRL optimizer. Args: @@ -52,6 +55,10 @@ class FtrlOptimizer(optimizer.Optimizer): use_locking: If `True` use locks for update operations. name: Optional name prefix for the operations created when applying gradients. Defaults to "Ftrl". + accum_name: The suffix for the variable that keeps the gradient squared + accumulator. If not present, defaults to name. + linear_name: The suffix for the variable that keeps the linear gradient + accumulator. If not present, defaults to name + "_1". Raises: ValueError: If one of the arguments is invalid. @@ -82,35 +89,36 @@ class FtrlOptimizer(optimizer.Optimizer): self._learning_rate_power_tensor = None self._l1_regularization_strength_tensor = None self._l2_regularization_strength_tensor = None + self._accum_name = accum_name + self._linear_name = linear_name def _create_slots(self, var_list): # Create the "accum" and "linear" slots. for v in var_list: with ops.colocate_with(v): - val = constant_op.constant(self._initial_accumulator_value, - dtype=v.dtype, shape=v.get_shape()) - self._get_or_make_slot(v, val, "accum", self._name) - self._zeros_slot(v, "linear", self._name) + val = constant_op.constant( + self._initial_accumulator_value, dtype=v.dtype, shape=v.get_shape()) + self._get_or_make_slot(v, val, "accum", self._accum_name or self._name) + self._zeros_slot(v, "linear", self._linear_name or self._name) def _prepare(self): self._learning_rate_tensor = ops.convert_to_tensor( - self._learning_rate, - name="learning_rate") + self._learning_rate, name="learning_rate") self._l1_regularization_strength_tensor = ops.convert_to_tensor( - self._l1_regularization_strength, - name="l1_regularization_strength") + self._l1_regularization_strength, name="l1_regularization_strength") self._l2_regularization_strength_tensor = ops.convert_to_tensor( - self._l2_regularization_strength, - name="l2_regularization_strength") + self._l2_regularization_strength, name="l2_regularization_strength") self._learning_rate_power_tensor = ops.convert_to_tensor( - self._learning_rate_power, - name="learning_rate_power") + self._learning_rate_power, name="learning_rate_power") def _apply_dense(self, grad, var): accum = self.get_slot(var, "accum") linear = self.get_slot(var, "linear") return training_ops.apply_ftrl( - var, accum, linear, grad, + var, + accum, + linear, + grad, math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), math_ops.cast(self._l1_regularization_strength_tensor, var.dtype.base_dtype), @@ -123,7 +131,10 @@ class FtrlOptimizer(optimizer.Optimizer): accum = self.get_slot(var, "accum") linear = self.get_slot(var, "linear") return training_ops.resource_apply_ftrl( - var.handle, accum.handle, linear.handle, grad, + var.handle, + accum.handle, + linear.handle, + grad, math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype), math_ops.cast(self._l1_regularization_strength_tensor, grad.dtype.base_dtype), @@ -136,7 +147,11 @@ class FtrlOptimizer(optimizer.Optimizer): accum = self.get_slot(var, "accum") linear = self.get_slot(var, "linear") return training_ops.sparse_apply_ftrl( - var, accum, linear, grad.values, grad.indices, + var, + accum, + linear, + grad.values, + grad.indices, math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), math_ops.cast(self._l1_regularization_strength_tensor, var.dtype.base_dtype), @@ -149,11 +164,13 @@ class FtrlOptimizer(optimizer.Optimizer): accum = self.get_slot(var, "accum") linear = self.get_slot(var, "linear") return training_ops.resource_sparse_apply_ftrl( - var.handle, accum.handle, linear.handle, grad, indices, + var.handle, + accum.handle, + linear.handle, + grad, + indices, math_ops.cast(self._learning_rate_tensor, grad.dtype), - math_ops.cast(self._l1_regularization_strength_tensor, - grad.dtype), - math_ops.cast(self._l2_regularization_strength_tensor, - grad.dtype), + math_ops.cast(self._l1_regularization_strength_tensor, grad.dtype), + math_ops.cast(self._l2_regularization_strength_tensor, grad.dtype), math_ops.cast(self._learning_rate_power_tensor, grad.dtype), use_locking=self._use_locking) diff --git a/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt index 0252474a1d5..2dc11df57b6 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt @@ -17,7 +17,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'learning_rate_power\', \'initial_accumulator_value\', \'l1_regularization_strength\', \'l2_regularization_strength\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'-0.5\', \'0.1\', \'0.0\', \'0.0\', \'False\', \'Ftrl\'], " + argspec: "args=[\'self\', \'learning_rate\', \'learning_rate_power\', \'initial_accumulator_value\', \'l1_regularization_strength\', \'l2_regularization_strength\', \'use_locking\', \'name\', \'accum_name\', \'linear_name\'], varargs=None, keywords=None, defaults=[\'-0.5\', \'0.1\', \'0.0\', \'0.0\', \'False\', \'Ftrl\', \'None\', \'None\'], " } member_method { name: "apply_gradients"