Create SDCAOptimizerV2 op to fix the "adaptative" typo.

PiperOrigin-RevId: 216370193
This commit is contained in:
Yuefeng Zhou 2018-10-09 09:32:50 -07:00 committed by TensorFlower Gardener
parent 87d8055c74
commit 3e1a0792fb
5 changed files with 246 additions and 19 deletions

View File

@ -22,6 +22,7 @@ import collections
from six.moves import range
from tensorflow.contrib.linear_optimizer.python.ops.sharded_mutable_dense_hashtable import ShardedMutableDenseHashTable
from tensorflow.python.compat import compat
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
@ -485,24 +486,44 @@ class SdcaModel(object):
sparse_weights.append(batch_gathered_weights)
# pylint: disable=protected-access
esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
sparse_example_indices,
sparse_feature_indices,
sparse_features_values,
self._convert_n_to_tensor(self._examples['dense_features']),
internal_convert_to_tensor(self._examples['example_weights']),
internal_convert_to_tensor(self._examples['example_labels']),
sparse_indices,
sparse_weights,
self._convert_n_to_tensor(self._slots[
'unshrinked_dense_features_weights']),
example_state_data,
loss_type=self._options['loss_type'],
l1=self._options['symmetric_l1_regularization'],
l2=self._symmetric_l2_regularization(),
num_loss_partitions=self._num_loss_partitions(),
num_inner_iterations=1,
adaptative=self._adaptive())
if compat.forward_compatible(year=2018, month=10, day=30):
esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2(
sparse_example_indices,
sparse_feature_indices,
sparse_features_values,
self._convert_n_to_tensor(self._examples['dense_features']),
internal_convert_to_tensor(self._examples['example_weights']),
internal_convert_to_tensor(self._examples['example_labels']),
sparse_indices,
sparse_weights,
self._convert_n_to_tensor(self._slots[
'unshrinked_dense_features_weights']),
example_state_data,
loss_type=self._options['loss_type'],
l1=self._options['symmetric_l1_regularization'],
l2=self._symmetric_l2_regularization(),
num_loss_partitions=self._num_loss_partitions(),
num_inner_iterations=1,
adaptive=self._adaptive())
else:
esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
sparse_example_indices,
sparse_feature_indices,
sparse_features_values,
self._convert_n_to_tensor(self._examples['dense_features']),
internal_convert_to_tensor(self._examples['example_weights']),
internal_convert_to_tensor(self._examples['example_labels']),
sparse_indices,
sparse_weights,
self._convert_n_to_tensor(self._slots[
'unshrinked_dense_features_weights']),
example_state_data,
loss_type=self._options['loss_type'],
l1=self._options['symmetric_l1_regularization'],
l2=self._symmetric_l2_regularization(),
num_loss_partitions=self._num_loss_partitions(),
num_inner_iterations=1,
adaptative=self._adaptive())
# pylint: enable=protected-access
with ops.control_dependencies([esu]):

View File

@ -0,0 +1,171 @@
op {
graph_op_name: "SdcaOptimizerV2"
visibility: HIDDEN
in_arg {
name: "sparse_example_indices"
description: <<END
a list of vectors which contain example indices.
END
}
in_arg {
name: "sparse_feature_indices"
description: <<END
a list of vectors which contain feature indices.
END
}
in_arg {
name: "sparse_feature_values"
description: <<END
a list of vectors which contains feature value
associated with each feature group.
END
}
in_arg {
name: "dense_features"
description: <<END
a list of matrices which contains the dense feature values.
END
}
in_arg {
name: "example_weights"
description: <<END
a vector which contains the weight associated with each
example.
END
}
in_arg {
name: "example_labels"
description: <<END
a vector which contains the label/target associated with each
example.
END
}
in_arg {
name: "sparse_indices"
description: <<END
a list of vectors where each value is the indices which has
corresponding weights in sparse_weights. This field maybe omitted for the
dense approach.
END
}
in_arg {
name: "sparse_weights"
description: <<END
a list of vectors where each value is the weight associated with
a sparse feature group.
END
}
in_arg {
name: "dense_weights"
description: <<END
a list of vectors where the values are the weights associated
with a dense feature group.
END
}
in_arg {
name: "example_state_data"
description: <<END
a list of vectors containing the example state data.
END
}
out_arg {
name: "out_example_state_data"
description: <<END
a list of vectors containing the updated example state
data.
END
}
out_arg {
name: "out_delta_sparse_weights"
description: <<END
a list of vectors where each value is the delta
weights associated with a sparse feature group.
END
}
out_arg {
name: "out_delta_dense_weights"
description: <<END
a list of vectors where the values are the delta
weights associated with a dense feature group.
END
}
attr {
name: "loss_type"
description: <<END
Type of the primal loss. Currently SdcaSolver supports logistic,
squared and hinge losses.
END
}
attr {
name: "adaptive"
default_value {
b: True
}
description: <<END
Whether to use Adaptive SDCA for the inner loop.
END
}
attr {
name: "num_sparse_features"
description: <<END
Number of sparse feature groups to train on.
END
}
attr {
name: "num_sparse_features_with_values"
description: <<END
Number of sparse feature groups with values
associated with it, otherwise implicitly treats values as 1.0.
END
}
attr {
name: "num_dense_features"
description: <<END
Number of dense feature groups to train on.
END
}
attr {
name: "l1"
description: <<END
Symmetric l1 regularization strength.
END
}
attr {
name: "l2"
description: <<END
Symmetric l2 regularization strength.
END
}
attr {
name: "num_loss_partitions"
description: <<END
Number of partitions of the global loss function.
END
}
attr {
name: "num_inner_iterations"
description: <<END
Number of iterations per mini-batch.
END
}
summary: "Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for"
description: <<END
linear models with L1 + L2 regularization. As global optimization objective is
strongly-convex, the optimizer optimizes the dual objective at each step. The
optimizer applies each update one example at a time. Examples are sampled
uniformly, and the optimizer is learning rate free and enjoys linear convergence
rate.
[Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
Shai Shalev-Shwartz, Tong Zhang. 2012
$$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
[Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
Peter Richtarik, Martin Takac. 2015
[Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
END
}

View File

@ -83,7 +83,11 @@ struct ComputeOptions {
context, false,
errors::InvalidArgument("Unsupported loss type: ", loss_type));
}
OP_REQUIRES_OK(context, context->GetAttr("adaptative", &adaptive));
auto s = context->GetAttr("adaptative", &adaptive);
if (!s.ok()) {
s = context->GetAttr("adaptive", &adaptive);
}
OP_REQUIRES_OK(context, s);
OP_REQUIRES_OK(
context, context->GetAttr("num_sparse_features", &num_sparse_features));
OP_REQUIRES_OK(context, context->GetAttr("num_sparse_features_with_values",
@ -245,6 +249,8 @@ class SdcaOptimizer : public OpKernel {
};
REGISTER_KERNEL_BUILDER(Name("SdcaOptimizer").Device(DEVICE_CPU),
SdcaOptimizer);
REGISTER_KERNEL_BUILDER(Name("SdcaOptimizerV2").Device(DEVICE_CPU),
SdcaOptimizer);
class SdcaShrinkL1 : public OpKernel {
public:

View File

@ -65,6 +65,34 @@ REGISTER_OP("SdcaOptimizer")
.Output("out_delta_dense_weights: num_dense_features * float")
.SetShapeFn(ApplySdcaOptimizerShapeFn);
// The SdcaOptimizerV2 op fixes the "adaptative" typo in v1.
REGISTER_OP("SdcaOptimizerV2")
.Attr(
"loss_type: {'logistic_loss', 'squared_loss', 'hinge_loss',"
"'smooth_hinge_loss', 'poisson_loss'}")
.Attr("adaptive : bool=false")
.Attr("num_sparse_features: int >= 0")
.Attr("num_sparse_features_with_values: int >= 0")
.Attr("num_dense_features: int >= 0")
.Attr("l1: float")
.Attr("l2: float")
.Attr("num_loss_partitions: int >= 1")
.Attr("num_inner_iterations: int >= 1")
.Input("sparse_example_indices: num_sparse_features * int64")
.Input("sparse_feature_indices: num_sparse_features * int64")
.Input("sparse_feature_values: num_sparse_features_with_values * float")
.Input("dense_features: num_dense_features * float")
.Input("example_weights: float")
.Input("example_labels: float")
.Input("sparse_indices: num_sparse_features * int64")
.Input("sparse_weights: num_sparse_features * float")
.Input("dense_weights: num_dense_features * float")
.Input("example_state_data: float")
.Output("out_example_state_data: float")
.Output("out_delta_sparse_weights: num_sparse_features * float")
.Output("out_delta_dense_weights: num_dense_features * float")
.SetShapeFn(ApplySdcaOptimizerShapeFn);
REGISTER_OP("SdcaShrinkL1")
.Attr("num_features: int >= 0")
.Attr("l1: float")

View File

@ -29,4 +29,5 @@ from tensorflow.python.ops.gen_sdca_ops import *
ops.NotDifferentiable("SdcaFprint")
ops.NotDifferentiable("SdcaOptimizer")
ops.NotDifferentiable("SdcaOptimizerV2")
ops.NotDifferentiable("SdcaShrinkL1")