Create SDCAOptimizerV2 op to fix the "adaptative" typo.
PiperOrigin-RevId: 216370193
This commit is contained in:
parent
87d8055c74
commit
3e1a0792fb
@ -22,6 +22,7 @@ import collections
|
||||
from six.moves import range
|
||||
|
||||
from tensorflow.contrib.linear_optimizer.python.ops.sharded_mutable_dense_hashtable import ShardedMutableDenseHashTable
|
||||
from tensorflow.python.compat import compat
|
||||
from tensorflow.python.framework import constant_op
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import ops
|
||||
@ -485,24 +486,44 @@ class SdcaModel(object):
|
||||
sparse_weights.append(batch_gathered_weights)
|
||||
|
||||
# pylint: disable=protected-access
|
||||
esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
|
||||
sparse_example_indices,
|
||||
sparse_feature_indices,
|
||||
sparse_features_values,
|
||||
self._convert_n_to_tensor(self._examples['dense_features']),
|
||||
internal_convert_to_tensor(self._examples['example_weights']),
|
||||
internal_convert_to_tensor(self._examples['example_labels']),
|
||||
sparse_indices,
|
||||
sparse_weights,
|
||||
self._convert_n_to_tensor(self._slots[
|
||||
'unshrinked_dense_features_weights']),
|
||||
example_state_data,
|
||||
loss_type=self._options['loss_type'],
|
||||
l1=self._options['symmetric_l1_regularization'],
|
||||
l2=self._symmetric_l2_regularization(),
|
||||
num_loss_partitions=self._num_loss_partitions(),
|
||||
num_inner_iterations=1,
|
||||
adaptative=self._adaptive())
|
||||
if compat.forward_compatible(year=2018, month=10, day=30):
|
||||
esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2(
|
||||
sparse_example_indices,
|
||||
sparse_feature_indices,
|
||||
sparse_features_values,
|
||||
self._convert_n_to_tensor(self._examples['dense_features']),
|
||||
internal_convert_to_tensor(self._examples['example_weights']),
|
||||
internal_convert_to_tensor(self._examples['example_labels']),
|
||||
sparse_indices,
|
||||
sparse_weights,
|
||||
self._convert_n_to_tensor(self._slots[
|
||||
'unshrinked_dense_features_weights']),
|
||||
example_state_data,
|
||||
loss_type=self._options['loss_type'],
|
||||
l1=self._options['symmetric_l1_regularization'],
|
||||
l2=self._symmetric_l2_regularization(),
|
||||
num_loss_partitions=self._num_loss_partitions(),
|
||||
num_inner_iterations=1,
|
||||
adaptive=self._adaptive())
|
||||
else:
|
||||
esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
|
||||
sparse_example_indices,
|
||||
sparse_feature_indices,
|
||||
sparse_features_values,
|
||||
self._convert_n_to_tensor(self._examples['dense_features']),
|
||||
internal_convert_to_tensor(self._examples['example_weights']),
|
||||
internal_convert_to_tensor(self._examples['example_labels']),
|
||||
sparse_indices,
|
||||
sparse_weights,
|
||||
self._convert_n_to_tensor(self._slots[
|
||||
'unshrinked_dense_features_weights']),
|
||||
example_state_data,
|
||||
loss_type=self._options['loss_type'],
|
||||
l1=self._options['symmetric_l1_regularization'],
|
||||
l2=self._symmetric_l2_regularization(),
|
||||
num_loss_partitions=self._num_loss_partitions(),
|
||||
num_inner_iterations=1,
|
||||
adaptative=self._adaptive())
|
||||
# pylint: enable=protected-access
|
||||
|
||||
with ops.control_dependencies([esu]):
|
||||
|
171
tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt
Normal file
171
tensorflow/core/api_def/base_api/api_def_SdcaOptimizerV2.pbtxt
Normal file
@ -0,0 +1,171 @@
|
||||
op {
|
||||
graph_op_name: "SdcaOptimizerV2"
|
||||
visibility: HIDDEN
|
||||
in_arg {
|
||||
name: "sparse_example_indices"
|
||||
description: <<END
|
||||
a list of vectors which contain example indices.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "sparse_feature_indices"
|
||||
description: <<END
|
||||
a list of vectors which contain feature indices.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "sparse_feature_values"
|
||||
description: <<END
|
||||
a list of vectors which contains feature value
|
||||
associated with each feature group.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "dense_features"
|
||||
description: <<END
|
||||
a list of matrices which contains the dense feature values.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "example_weights"
|
||||
description: <<END
|
||||
a vector which contains the weight associated with each
|
||||
example.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "example_labels"
|
||||
description: <<END
|
||||
a vector which contains the label/target associated with each
|
||||
example.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "sparse_indices"
|
||||
description: <<END
|
||||
a list of vectors where each value is the indices which has
|
||||
corresponding weights in sparse_weights. This field maybe omitted for the
|
||||
dense approach.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "sparse_weights"
|
||||
description: <<END
|
||||
a list of vectors where each value is the weight associated with
|
||||
a sparse feature group.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "dense_weights"
|
||||
description: <<END
|
||||
a list of vectors where the values are the weights associated
|
||||
with a dense feature group.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "example_state_data"
|
||||
description: <<END
|
||||
a list of vectors containing the example state data.
|
||||
END
|
||||
}
|
||||
out_arg {
|
||||
name: "out_example_state_data"
|
||||
description: <<END
|
||||
a list of vectors containing the updated example state
|
||||
data.
|
||||
END
|
||||
}
|
||||
out_arg {
|
||||
name: "out_delta_sparse_weights"
|
||||
description: <<END
|
||||
a list of vectors where each value is the delta
|
||||
weights associated with a sparse feature group.
|
||||
END
|
||||
}
|
||||
out_arg {
|
||||
name: "out_delta_dense_weights"
|
||||
description: <<END
|
||||
a list of vectors where the values are the delta
|
||||
weights associated with a dense feature group.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "loss_type"
|
||||
description: <<END
|
||||
Type of the primal loss. Currently SdcaSolver supports logistic,
|
||||
squared and hinge losses.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "adaptive"
|
||||
default_value {
|
||||
b: True
|
||||
}
|
||||
description: <<END
|
||||
Whether to use Adaptive SDCA for the inner loop.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "num_sparse_features"
|
||||
description: <<END
|
||||
Number of sparse feature groups to train on.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "num_sparse_features_with_values"
|
||||
description: <<END
|
||||
Number of sparse feature groups with values
|
||||
associated with it, otherwise implicitly treats values as 1.0.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "num_dense_features"
|
||||
description: <<END
|
||||
Number of dense feature groups to train on.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "l1"
|
||||
description: <<END
|
||||
Symmetric l1 regularization strength.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "l2"
|
||||
description: <<END
|
||||
Symmetric l2 regularization strength.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "num_loss_partitions"
|
||||
description: <<END
|
||||
Number of partitions of the global loss function.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "num_inner_iterations"
|
||||
description: <<END
|
||||
Number of iterations per mini-batch.
|
||||
END
|
||||
}
|
||||
summary: "Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for"
|
||||
description: <<END
|
||||
linear models with L1 + L2 regularization. As global optimization objective is
|
||||
strongly-convex, the optimizer optimizes the dual objective at each step. The
|
||||
optimizer applies each update one example at a time. Examples are sampled
|
||||
uniformly, and the optimizer is learning rate free and enjoys linear convergence
|
||||
rate.
|
||||
|
||||
[Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
|
||||
Shai Shalev-Shwartz, Tong Zhang. 2012
|
||||
|
||||
$$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
|
||||
|
||||
[Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
|
||||
Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
|
||||
Peter Richtarik, Martin Takac. 2015
|
||||
|
||||
[Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
|
||||
Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
|
||||
END
|
||||
}
|
@ -83,7 +83,11 @@ struct ComputeOptions {
|
||||
context, false,
|
||||
errors::InvalidArgument("Unsupported loss type: ", loss_type));
|
||||
}
|
||||
OP_REQUIRES_OK(context, context->GetAttr("adaptative", &adaptive));
|
||||
auto s = context->GetAttr("adaptative", &adaptive);
|
||||
if (!s.ok()) {
|
||||
s = context->GetAttr("adaptive", &adaptive);
|
||||
}
|
||||
OP_REQUIRES_OK(context, s);
|
||||
OP_REQUIRES_OK(
|
||||
context, context->GetAttr("num_sparse_features", &num_sparse_features));
|
||||
OP_REQUIRES_OK(context, context->GetAttr("num_sparse_features_with_values",
|
||||
@ -245,6 +249,8 @@ class SdcaOptimizer : public OpKernel {
|
||||
};
|
||||
REGISTER_KERNEL_BUILDER(Name("SdcaOptimizer").Device(DEVICE_CPU),
|
||||
SdcaOptimizer);
|
||||
REGISTER_KERNEL_BUILDER(Name("SdcaOptimizerV2").Device(DEVICE_CPU),
|
||||
SdcaOptimizer);
|
||||
|
||||
class SdcaShrinkL1 : public OpKernel {
|
||||
public:
|
||||
|
@ -65,6 +65,34 @@ REGISTER_OP("SdcaOptimizer")
|
||||
.Output("out_delta_dense_weights: num_dense_features * float")
|
||||
.SetShapeFn(ApplySdcaOptimizerShapeFn);
|
||||
|
||||
// The SdcaOptimizerV2 op fixes the "adaptative" typo in v1.
|
||||
REGISTER_OP("SdcaOptimizerV2")
|
||||
.Attr(
|
||||
"loss_type: {'logistic_loss', 'squared_loss', 'hinge_loss',"
|
||||
"'smooth_hinge_loss', 'poisson_loss'}")
|
||||
.Attr("adaptive : bool=false")
|
||||
.Attr("num_sparse_features: int >= 0")
|
||||
.Attr("num_sparse_features_with_values: int >= 0")
|
||||
.Attr("num_dense_features: int >= 0")
|
||||
.Attr("l1: float")
|
||||
.Attr("l2: float")
|
||||
.Attr("num_loss_partitions: int >= 1")
|
||||
.Attr("num_inner_iterations: int >= 1")
|
||||
.Input("sparse_example_indices: num_sparse_features * int64")
|
||||
.Input("sparse_feature_indices: num_sparse_features * int64")
|
||||
.Input("sparse_feature_values: num_sparse_features_with_values * float")
|
||||
.Input("dense_features: num_dense_features * float")
|
||||
.Input("example_weights: float")
|
||||
.Input("example_labels: float")
|
||||
.Input("sparse_indices: num_sparse_features * int64")
|
||||
.Input("sparse_weights: num_sparse_features * float")
|
||||
.Input("dense_weights: num_dense_features * float")
|
||||
.Input("example_state_data: float")
|
||||
.Output("out_example_state_data: float")
|
||||
.Output("out_delta_sparse_weights: num_sparse_features * float")
|
||||
.Output("out_delta_dense_weights: num_dense_features * float")
|
||||
.SetShapeFn(ApplySdcaOptimizerShapeFn);
|
||||
|
||||
REGISTER_OP("SdcaShrinkL1")
|
||||
.Attr("num_features: int >= 0")
|
||||
.Attr("l1: float")
|
||||
|
@ -29,4 +29,5 @@ from tensorflow.python.ops.gen_sdca_ops import *
|
||||
|
||||
ops.NotDifferentiable("SdcaFprint")
|
||||
ops.NotDifferentiable("SdcaOptimizer")
|
||||
ops.NotDifferentiable("SdcaOptimizerV2")
|
||||
ops.NotDifferentiable("SdcaShrinkL1")
|
||||
|
Loading…
Reference in New Issue
Block a user