Create SDCAOptimizerV2 op to fix the "adaptative" typo.

PiperOrigin-RevId: 216370193
2018-10-09 09:32:50 -07:00 · 2018-10-09 09:32:50 -07:00 · 3e1a0792fb
commit 3e1a0792fb
parent 87d8055c74
5 changed files with 246 additions and 19 deletions
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@ -22,6 +22,7 @@ import collections
 from six.moves import range

 from tensorflow.contrib.linear_optimizer.python.ops.sharded_mutable_dense_hashtable import ShardedMutableDenseHashTable
+from tensorflow.python.compat import compat
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@ -485,24 +486,44 @@ class SdcaModel(object):
        sparse_weights.append(batch_gathered_weights)

      # pylint: disable=protected-access
-      esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
-          sparse_example_indices,
-          sparse_feature_indices,
-          sparse_features_values,
-          self._convert_n_to_tensor(self._examples['dense_features']),
-          internal_convert_to_tensor(self._examples['example_weights']),
-          internal_convert_to_tensor(self._examples['example_labels']),
-          sparse_indices,
-          sparse_weights,
-          self._convert_n_to_tensor(self._slots[
-              'unshrinked_dense_features_weights']),
-          example_state_data,
-          loss_type=self._options['loss_type'],
-          l1=self._options['symmetric_l1_regularization'],
-          l2=self._symmetric_l2_regularization(),
-          num_loss_partitions=self._num_loss_partitions(),
-          num_inner_iterations=1,
-          adaptative=self._adaptive())
+      if compat.forward_compatible(year=2018, month=10, day=30):
+        esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2(
+            sparse_example_indices,
+            sparse_feature_indices,
+            sparse_features_values,
+            self._convert_n_to_tensor(self._examples['dense_features']),
+            internal_convert_to_tensor(self._examples['example_weights']),
+            internal_convert_to_tensor(self._examples['example_labels']),
+            sparse_indices,
+            sparse_weights,
+            self._convert_n_to_tensor(self._slots[
+                'unshrinked_dense_features_weights']),
+            example_state_data,
+            loss_type=self._options['loss_type'],
+            l1=self._options['symmetric_l1_regularization'],
+            l2=self._symmetric_l2_regularization(),
+            num_loss_partitions=self._num_loss_partitions(),
+            num_inner_iterations=1,
+            adaptive=self._adaptive())
+      else:
+        esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
+            sparse_example_indices,
+            sparse_feature_indices,
+            sparse_features_values,
+            self._convert_n_to_tensor(self._examples['dense_features']),
+            internal_convert_to_tensor(self._examples['example_weights']),
+            internal_convert_to_tensor(self._examples['example_labels']),
+            sparse_indices,
+            sparse_weights,
+            self._convert_n_to_tensor(self._slots[
+                'unshrinked_dense_features_weights']),
+            example_state_data,
+            loss_type=self._options['loss_type'],
+            l1=self._options['symmetric_l1_regularization'],
+            l2=self._symmetric_l2_regularization(),
+            num_loss_partitions=self._num_loss_partitions(),
+            num_inner_iterations=1,
+            adaptative=self._adaptive())
      # pylint: enable=protected-access

      with ops.control_dependencies([esu]):
--- a/tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt
@ -0,0 +1,171 @@
+op {
+  graph_op_name: "SdcaOptimizerV2"
+  visibility: HIDDEN
+  in_arg {
+    name: "sparse_example_indices"
+    description: <<END
+a list of vectors which contain example indices.
+END
+  }
+  in_arg {
+    name: "sparse_feature_indices"
+    description: <<END
+a list of vectors which contain feature indices.
+END
+  }
+  in_arg {
+    name: "sparse_feature_values"
+    description: <<END
+a list of vectors which contains feature value
+associated with each feature group.
+END
+  }
+  in_arg {
+    name: "dense_features"
+    description: <<END
+a list of matrices which contains the dense feature values.
+END
+  }
+  in_arg {
+    name: "example_weights"
+    description: <<END
+a vector which contains the weight associated with each
+example.
+END
+  }
+  in_arg {
+    name: "example_labels"
+    description: <<END
+a vector which contains the label/target associated with each
+example.
+END
+  }
+  in_arg {
+    name: "sparse_indices"
+    description: <<END
+a list of vectors where each value is the indices which has
+corresponding weights in sparse_weights. This field maybe omitted for the
+dense approach.
+END
+  }
+  in_arg {
+    name: "sparse_weights"
+    description: <<END
+a list of vectors where each value is the weight associated with
+a sparse feature group.
+END
+  }
+  in_arg {
+    name: "dense_weights"
+    description: <<END
+a list of vectors where the values are the weights associated
+with a dense feature group.
+END
+  }
+  in_arg {
+    name: "example_state_data"
+    description: <<END
+a list of vectors containing the example state data.
+END
+  }
+  out_arg {
+    name: "out_example_state_data"
+    description: <<END
+a list of vectors containing the updated example state
+data.
+END
+  }
+  out_arg {
+    name: "out_delta_sparse_weights"
+    description: <<END
+a list of vectors where each value is the delta
+weights associated with a sparse feature group.
+END
+  }
+  out_arg {
+    name: "out_delta_dense_weights"
+    description: <<END
+a list of vectors where the values are the delta
+weights associated with a dense feature group.
+END
+  }
+  attr {
+    name: "loss_type"
+    description: <<END
+Type of the primal loss. Currently SdcaSolver supports logistic,
+squared and hinge losses.
+END
+  }
+  attr {
+    name: "adaptive"
+    default_value {
+      b: True
+    }
+    description: <<END
+Whether to use Adaptive SDCA for the inner loop.
+END
+  }
+  attr {
+    name: "num_sparse_features"
+    description: <<END
+Number of sparse feature groups to train on.
+END
+  }
+  attr {
+    name: "num_sparse_features_with_values"
+    description: <<END
+Number of sparse feature groups with values
+associated with it, otherwise implicitly treats values as 1.0.
+END
+  }
+  attr {
+    name: "num_dense_features"
+    description: <<END
+Number of dense feature groups to train on.
+END
+  }
+  attr {
+    name: "l1"
+    description: <<END
+Symmetric l1 regularization strength.
+END
+  }
+  attr {
+    name: "l2"
+    description: <<END
+Symmetric l2 regularization strength.
+END
+  }
+  attr {
+    name: "num_loss_partitions"
+    description: <<END
+Number of partitions of the global loss function.
+END
+  }
+  attr {
+    name: "num_inner_iterations"
+    description: <<END
+Number of iterations per mini-batch.
+END
+  }
+  summary: "Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for"
+  description: <<END
+linear models with L1 + L2 regularization. As global optimization objective is
+strongly-convex, the optimizer optimizes the dual objective at each step. The
+optimizer applies each update one example at a time. Examples are sampled
+uniformly, and the optimizer is learning rate free and enjoys linear convergence
+rate.
+
+[Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
+Shai Shalev-Shwartz, Tong Zhang. 2012
+
+$$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
+
+[Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
+Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
+Peter Richtarik, Martin Takac. 2015
+
+[Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
+Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
+END
+}
--- a/tensorflow/core/kernels/sdca_ops.cc
+++ b/tensorflow/core/kernels/sdca_ops.cc
@ -83,7 +83,11 @@ struct ComputeOptions {
          context, false,
          errors::InvalidArgument("Unsupported loss type: ", loss_type));
    }
-    OP_REQUIRES_OK(context, context->GetAttr("adaptative", &adaptive));
+    auto s = context->GetAttr("adaptative", &adaptive);
+    if (!s.ok()) {
+      s = context->GetAttr("adaptive", &adaptive);
+    }
+    OP_REQUIRES_OK(context, s);
    OP_REQUIRES_OK(
        context, context->GetAttr("num_sparse_features", &num_sparse_features));
    OP_REQUIRES_OK(context, context->GetAttr("num_sparse_features_with_values",
@ -245,6 +249,8 @@ class SdcaOptimizer : public OpKernel {
 };
 REGISTER_KERNEL_BUILDER(Name("SdcaOptimizer").Device(DEVICE_CPU),
                        SdcaOptimizer);
+REGISTER_KERNEL_BUILDER(Name("SdcaOptimizerV2").Device(DEVICE_CPU),
+                        SdcaOptimizer);

 class SdcaShrinkL1 : public OpKernel {
 public:
--- a/tensorflow/core/ops/sdca_ops.cc
+++ b/tensorflow/core/ops/sdca_ops.cc
@ -65,6 +65,34 @@ REGISTER_OP("SdcaOptimizer")
    .Output("out_delta_dense_weights: num_dense_features * float")
    .SetShapeFn(ApplySdcaOptimizerShapeFn);

+// The SdcaOptimizerV2 op fixes the "adaptative" typo in v1.
+REGISTER_OP("SdcaOptimizerV2")
+    .Attr(
+        "loss_type: {'logistic_loss', 'squared_loss', 'hinge_loss',"
+        "'smooth_hinge_loss', 'poisson_loss'}")
+    .Attr("adaptive : bool=false")
+    .Attr("num_sparse_features: int >= 0")
+    .Attr("num_sparse_features_with_values: int >= 0")
+    .Attr("num_dense_features: int >= 0")
+    .Attr("l1: float")
+    .Attr("l2: float")
+    .Attr("num_loss_partitions: int >= 1")
+    .Attr("num_inner_iterations: int >= 1")
+    .Input("sparse_example_indices: num_sparse_features * int64")
+    .Input("sparse_feature_indices: num_sparse_features * int64")
+    .Input("sparse_feature_values: num_sparse_features_with_values * float")
+    .Input("dense_features: num_dense_features * float")
+    .Input("example_weights: float")
+    .Input("example_labels: float")
+    .Input("sparse_indices: num_sparse_features * int64")
+    .Input("sparse_weights: num_sparse_features * float")
+    .Input("dense_weights: num_dense_features * float")
+    .Input("example_state_data: float")
+    .Output("out_example_state_data: float")
+    .Output("out_delta_sparse_weights: num_sparse_features * float")
+    .Output("out_delta_dense_weights: num_dense_features * float")
+    .SetShapeFn(ApplySdcaOptimizerShapeFn);
+
 REGISTER_OP("SdcaShrinkL1")
    .Attr("num_features: int >= 0")
    .Attr("l1: float")
--- a/tensorflow/python/ops/sdca_ops.py
+++ b/tensorflow/python/ops/sdca_ops.py
@ -29,4 +29,5 @@ from tensorflow.python.ops.gen_sdca_ops import *

 ops.NotDifferentiable("SdcaFprint")
 ops.NotDifferentiable("SdcaOptimizer")
+ops.NotDifferentiable("SdcaOptimizerV2")
 ops.NotDifferentiable("SdcaShrinkL1")