Have sdca_model_fn used in SDCA-based optimizers use the training Ops from SDCAOptimizer instead of replicating this logic in place.
Change: 155326885
This commit is contained in:
parent
9d7e9b0e6b
commit
4dc3fdca61
@ -111,13 +111,11 @@ py_library(
|
|||||||
srcs_version = "PY2AND3",
|
srcs_version = "PY2AND3",
|
||||||
deps = [
|
deps = [
|
||||||
":sdca_ops_py",
|
":sdca_ops_py",
|
||||||
":sparse_feature_column_py",
|
|
||||||
"//tensorflow/contrib/framework:framework_py",
|
"//tensorflow/contrib/framework:framework_py",
|
||||||
"//tensorflow/contrib/layers:layers_py",
|
"//tensorflow/contrib/layers:layers_py",
|
||||||
"//tensorflow/contrib/learn",
|
"//tensorflow/contrib/learn",
|
||||||
"//tensorflow/python:array_ops",
|
"//tensorflow/python:array_ops",
|
||||||
"//tensorflow/python:dtypes",
|
"//tensorflow/python:dtypes",
|
||||||
"//tensorflow/python:math_ops",
|
|
||||||
"//tensorflow/python:sparse_tensor",
|
"//tensorflow/python:sparse_tensor",
|
||||||
"//tensorflow/python:tensor_util",
|
"//tensorflow/python:tensor_util",
|
||||||
"//tensorflow/python:training",
|
"//tensorflow/python:training",
|
||||||
|
@ -24,13 +24,10 @@ from tensorflow.contrib.learn.python.learn.estimators import estimator
|
|||||||
from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
|
from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
|
||||||
from tensorflow.contrib.learn.python.learn.estimators import prediction_key
|
from tensorflow.contrib.learn.python.learn.estimators import prediction_key
|
||||||
from tensorflow.contrib.linear_optimizer.python import sdca_optimizer
|
from tensorflow.contrib.linear_optimizer.python import sdca_optimizer
|
||||||
from tensorflow.contrib.linear_optimizer.python.ops import sdca_ops
|
|
||||||
from tensorflow.contrib.linear_optimizer.python.ops.sparse_feature_column import SparseFeatureColumn
|
|
||||||
from tensorflow.python.framework import dtypes
|
from tensorflow.python.framework import dtypes
|
||||||
from tensorflow.python.framework import sparse_tensor
|
from tensorflow.python.framework import sparse_tensor
|
||||||
from tensorflow.python.framework import tensor_util
|
from tensorflow.python.framework import tensor_util
|
||||||
from tensorflow.python.ops import array_ops
|
from tensorflow.python.ops import array_ops
|
||||||
from tensorflow.python.ops import math_ops
|
|
||||||
from tensorflow.python.ops import variable_scope
|
from tensorflow.python.ops import variable_scope
|
||||||
from tensorflow.python.training import session_run_hook
|
from tensorflow.python.training import session_run_hook
|
||||||
|
|
||||||
@ -76,131 +73,6 @@ def _add_bias_column(feature_columns, columns_to_tensors, bias_variable,
|
|||||||
columns_to_variables[bias_column] = [bias_variable]
|
columns_to_variables[bias_column] = [bias_variable]
|
||||||
|
|
||||||
|
|
||||||
def _get_sdca_train_step(optimizer, columns_to_variables, weight_column_name,
|
|
||||||
loss_type, features, targets, global_step):
|
|
||||||
"""Returns the training operation of an SdcaModel optimizer."""
|
|
||||||
|
|
||||||
def _dense_tensor_to_sparse_feature_column(dense_tensor):
|
|
||||||
"""Returns SparseFeatureColumn for the input dense_tensor."""
|
|
||||||
ignore_value = 0.0
|
|
||||||
sparse_indices = array_ops.where(
|
|
||||||
math_ops.not_equal(dense_tensor,
|
|
||||||
math_ops.cast(ignore_value, dense_tensor.dtype)))
|
|
||||||
sparse_values = array_ops.gather_nd(dense_tensor, sparse_indices)
|
|
||||||
# TODO(sibyl-Aix6ihai, sibyl-vie3Poto): Makes this efficient, as now SDCA supports
|
|
||||||
# very sparse features with weights and not weights.
|
|
||||||
return SparseFeatureColumn(
|
|
||||||
array_ops.reshape(
|
|
||||||
array_ops.split(value=sparse_indices, num_or_size_splits=2,
|
|
||||||
axis=1)[0], [-1]),
|
|
||||||
array_ops.reshape(
|
|
||||||
array_ops.split(value=sparse_indices, num_or_size_splits=2,
|
|
||||||
axis=1)[1], [-1]),
|
|
||||||
array_ops.reshape(math_ops.to_float(sparse_values), [-1]))
|
|
||||||
|
|
||||||
def _training_examples_and_variables():
|
|
||||||
"""Returns dictionaries for training examples and variables."""
|
|
||||||
batch_size = targets.get_shape()[0]
|
|
||||||
|
|
||||||
# Iterate over all feature columns and create appropriate lists for dense
|
|
||||||
# and sparse features as well as dense and sparse weights (variables) for
|
|
||||||
# SDCA.
|
|
||||||
# TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
|
|
||||||
# dict as 1-dimensional tensors.
|
|
||||||
dense_features, sparse_features, sparse_feature_with_values = [], [], []
|
|
||||||
dense_feature_weights = []
|
|
||||||
sparse_feature_weights, sparse_feature_with_values_weights = [], []
|
|
||||||
for column in sorted(columns_to_variables.keys(), key=lambda x: x.key):
|
|
||||||
transformed_tensor = features[column]
|
|
||||||
if isinstance(column, layers.feature_column._RealValuedColumn): # pylint: disable=protected-access
|
|
||||||
# A real-valued column corresponds to a dense feature in SDCA. A
|
|
||||||
# transformed tensor corresponding to a RealValuedColumn has rank 2
|
|
||||||
# (its shape is typically [batch_size, column.dimension]) and so it
|
|
||||||
# can be passed to SDCA as is.
|
|
||||||
dense_features.append(transformed_tensor)
|
|
||||||
# For real valued columns, the variables list contains exactly one
|
|
||||||
# element.
|
|
||||||
dense_feature_weights.append(columns_to_variables[column][0])
|
|
||||||
elif isinstance(column, layers.feature_column._BucketizedColumn): # pylint: disable=protected-access
|
|
||||||
# A bucketized column corresponds to a sparse feature in SDCA. The
|
|
||||||
# bucketized feature is "sparsified" for SDCA by converting it to a
|
|
||||||
# SparseFeatureColumn respresenting the one-hot encoding of the
|
|
||||||
# bucketized feature.
|
|
||||||
#
|
|
||||||
# TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a
|
|
||||||
# bucketized feature column to a dense feature in SDCA. This will likely
|
|
||||||
# depend on the number of buckets.
|
|
||||||
dense_bucket_tensor = column._to_dnn_input_layer(transformed_tensor) # pylint: disable=protected-access
|
|
||||||
sparse_feature_column = _dense_tensor_to_sparse_feature_column(
|
|
||||||
dense_bucket_tensor)
|
|
||||||
sparse_feature_with_values.append(sparse_feature_column)
|
|
||||||
# For bucketized columns, the variables list contains exactly one
|
|
||||||
# element.
|
|
||||||
sparse_feature_with_values_weights.append(
|
|
||||||
columns_to_variables[column][0])
|
|
||||||
elif isinstance(
|
|
||||||
column,
|
|
||||||
(
|
|
||||||
layers.feature_column._CrossedColumn, # pylint: disable=protected-access
|
|
||||||
layers.feature_column._SparseColumn)): # pylint: disable=protected-access
|
|
||||||
sparse_features.append(
|
|
||||||
SparseFeatureColumn(
|
|
||||||
array_ops.reshape(
|
|
||||||
array_ops.split(
|
|
||||||
value=transformed_tensor.indices,
|
|
||||||
num_or_size_splits=2,
|
|
||||||
axis=1)[0], [-1]),
|
|
||||||
array_ops.reshape(transformed_tensor.values, [-1]), None))
|
|
||||||
sparse_feature_weights.append(columns_to_variables[column][0])
|
|
||||||
elif isinstance(column, layers.feature_column._WeightedSparseColumn): # pylint: disable=protected-access
|
|
||||||
id_tensor = column.id_tensor(transformed_tensor)
|
|
||||||
weight_tensor = column.weight_tensor(transformed_tensor)
|
|
||||||
sparse_feature_with_values.append(
|
|
||||||
SparseFeatureColumn(
|
|
||||||
array_ops.reshape(
|
|
||||||
array_ops.split(
|
|
||||||
value=id_tensor.indices, num_or_size_splits=2, axis=1)[
|
|
||||||
0], [-1]),
|
|
||||||
array_ops.reshape(id_tensor.values, [-1]),
|
|
||||||
array_ops.reshape(weight_tensor.values, [-1])))
|
|
||||||
sparse_feature_with_values_weights.append(
|
|
||||||
columns_to_variables[column][0])
|
|
||||||
else:
|
|
||||||
raise ValueError("SDCAOptimizer does not support column type {}".format(
|
|
||||||
type(column).__name__))
|
|
||||||
|
|
||||||
example_weights = array_ops.reshape(
|
|
||||||
features[weight_column_name],
|
|
||||||
shape=[-1]) if weight_column_name else array_ops.ones([batch_size])
|
|
||||||
example_ids = features[optimizer.example_id_column]
|
|
||||||
sparse_feature_with_values.extend(sparse_features)
|
|
||||||
sparse_feature_with_values_weights.extend(sparse_feature_weights)
|
|
||||||
examples = dict(
|
|
||||||
sparse_features=sparse_feature_with_values,
|
|
||||||
dense_features=dense_features,
|
|
||||||
example_labels=math_ops.to_float(
|
|
||||||
array_ops.reshape(targets, shape=[-1])),
|
|
||||||
example_weights=example_weights,
|
|
||||||
example_ids=example_ids)
|
|
||||||
sdca_variables = dict(
|
|
||||||
sparse_features_weights=sparse_feature_with_values_weights,
|
|
||||||
dense_features_weights=dense_feature_weights)
|
|
||||||
return examples, sdca_variables
|
|
||||||
|
|
||||||
training_examples, training_variables = _training_examples_and_variables()
|
|
||||||
sdca_model = sdca_ops.SdcaModel(
|
|
||||||
examples=training_examples,
|
|
||||||
variables=training_variables,
|
|
||||||
options=dict(
|
|
||||||
symmetric_l1_regularization=optimizer.symmetric_l1_regularization,
|
|
||||||
symmetric_l2_regularization=optimizer.symmetric_l2_regularization,
|
|
||||||
num_loss_partitions=optimizer.num_loss_partitions,
|
|
||||||
num_table_shards=optimizer.num_table_shards,
|
|
||||||
loss_type=loss_type))
|
|
||||||
train_op = sdca_model.minimize(global_step=global_step)
|
|
||||||
return sdca_model, train_op
|
|
||||||
|
|
||||||
|
|
||||||
def sdca_model_fn(features, labels, mode, params, config=None):
|
def sdca_model_fn(features, labels, mode, params, config=None):
|
||||||
"""A model_fn for linear models that use the SDCA optimizer.
|
"""A model_fn for linear models that use the SDCA optimizer.
|
||||||
|
|
||||||
@ -283,9 +155,9 @@ def sdca_model_fn(features, labels, mode, params, config=None):
|
|||||||
|
|
||||||
def _train_op_fn(unused_loss):
|
def _train_op_fn(unused_loss):
|
||||||
global_step = contrib_variables.get_global_step()
|
global_step = contrib_variables.get_global_step()
|
||||||
sdca_model, train_op = _get_sdca_train_step(optimizer, columns_to_variables,
|
sdca_model, train_op = optimizer.get_train_step(
|
||||||
weight_column_name, loss_type,
|
columns_to_variables, weight_column_name, loss_type, features, labels,
|
||||||
features, labels, global_step)
|
global_step)
|
||||||
if update_weights_hook is not None:
|
if update_weights_hook is not None:
|
||||||
update_weights_hook.set_parameters(sdca_model, train_op)
|
update_weights_hook.set_parameters(sdca_model, train_op)
|
||||||
return train_op
|
return train_op
|
||||||
|
@ -99,16 +99,16 @@ class SDCAOptimizer(object):
|
|||||||
def symmetric_l2_regularization(self):
|
def symmetric_l2_regularization(self):
|
||||||
return self._symmetric_l2_regularization
|
return self._symmetric_l2_regularization
|
||||||
|
|
||||||
def get_train_step(self, columns_to_variables,
|
def get_train_step(self, columns_to_variables, weight_column_name, loss_type,
|
||||||
weight_column_name, loss_type, features, targets,
|
features, targets, global_step):
|
||||||
global_step):
|
|
||||||
"""Returns the training operation of an SdcaModel optimizer."""
|
"""Returns the training operation of an SdcaModel optimizer."""
|
||||||
|
|
||||||
def _tensor_to_sparse_feature_column(dense_tensor):
|
def _dense_tensor_to_sparse_feature_column(dense_tensor):
|
||||||
"""Returns SparseFeatureColumn for the input dense_tensor."""
|
"""Returns SparseFeatureColumn for the input dense_tensor."""
|
||||||
ignore_value = 0.0
|
ignore_value = 0.0
|
||||||
sparse_indices = array_ops.where(math_ops.not_equal(
|
sparse_indices = array_ops.where(
|
||||||
dense_tensor, math_ops.cast(ignore_value, dense_tensor.dtype)))
|
math_ops.not_equal(dense_tensor,
|
||||||
|
math_ops.cast(ignore_value, dense_tensor.dtype)))
|
||||||
sparse_values = array_ops.gather_nd(dense_tensor, sparse_indices)
|
sparse_values = array_ops.gather_nd(dense_tensor, sparse_indices)
|
||||||
# TODO(sibyl-Aix6ihai, sibyl-vie3Poto): Makes this efficient, as now SDCA supports
|
# TODO(sibyl-Aix6ihai, sibyl-vie3Poto): Makes this efficient, as now SDCA supports
|
||||||
# very sparse features with weights and not weights.
|
# very sparse features with weights and not weights.
|
||||||
@ -133,10 +133,9 @@ class SDCAOptimizer(object):
|
|||||||
dense_features, sparse_features, sparse_feature_with_values = [], [], []
|
dense_features, sparse_features, sparse_feature_with_values = [], [], []
|
||||||
dense_feature_weights = []
|
dense_feature_weights = []
|
||||||
sparse_feature_weights, sparse_feature_with_values_weights = [], []
|
sparse_feature_weights, sparse_feature_with_values_weights = [], []
|
||||||
# pylint: disable=protected-access
|
|
||||||
for column in sorted(columns_to_variables.keys(), key=lambda x: x.key):
|
for column in sorted(columns_to_variables.keys(), key=lambda x: x.key):
|
||||||
transformed_tensor = features[column]
|
transformed_tensor = features[column]
|
||||||
if isinstance(column, layers.feature_column._RealValuedColumn):
|
if isinstance(column, layers.feature_column._RealValuedColumn): # pylint: disable=protected-access
|
||||||
# A real-valued column corresponds to a dense feature in SDCA. A
|
# A real-valued column corresponds to a dense feature in SDCA. A
|
||||||
# transformed tensor corresponding to a RealValuedColumn has rank 2
|
# transformed tensor corresponding to a RealValuedColumn has rank 2
|
||||||
# (its shape is typically [batch_size, column.dimension]) and so it
|
# (its shape is typically [batch_size, column.dimension]) and so it
|
||||||
@ -145,22 +144,28 @@ class SDCAOptimizer(object):
|
|||||||
# For real valued columns, the variables list contains exactly one
|
# For real valued columns, the variables list contains exactly one
|
||||||
# element.
|
# element.
|
||||||
dense_feature_weights.append(columns_to_variables[column][0])
|
dense_feature_weights.append(columns_to_variables[column][0])
|
||||||
elif isinstance(column, layers.feature_column._BucketizedColumn):
|
elif isinstance(column, layers.feature_column._BucketizedColumn): # pylint: disable=protected-access
|
||||||
# A bucketized column corresponds to a sparse feature in SDCA. The
|
# A bucketized column corresponds to a sparse feature in SDCA. The
|
||||||
# bucketized feature is "sparsified" for SDCA by converting it to a
|
# bucketized feature is "sparsified" for SDCA by converting it to a
|
||||||
# SparseFeatureColumn respresenting the one-hot encoding of the
|
# SparseFeatureColumn respresenting the one-hot encoding of the
|
||||||
# bucketized feature.
|
# bucketized feature.
|
||||||
dense_bucket_tensor = layers.input_from_feature_columns(
|
#
|
||||||
{column: transformed_tensor}, [column])
|
# TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a
|
||||||
sparse_feature_column = _tensor_to_sparse_feature_column(
|
# bucketized feature column to a dense feature in SDCA. This will
|
||||||
|
# likely depend on the number of buckets.
|
||||||
|
dense_bucket_tensor = column._to_dnn_input_layer(transformed_tensor) # pylint: disable=protected-access
|
||||||
|
sparse_feature_column = _dense_tensor_to_sparse_feature_column(
|
||||||
dense_bucket_tensor)
|
dense_bucket_tensor)
|
||||||
sparse_feature_with_values.append(sparse_feature_column)
|
sparse_feature_with_values.append(sparse_feature_column)
|
||||||
# For bucketized columns, the variables list contains exactly one
|
# For bucketized columns, the variables list contains exactly one
|
||||||
# element.
|
# element.
|
||||||
sparse_feature_with_values_weights.append(
|
sparse_feature_with_values_weights.append(
|
||||||
columns_to_variables[column][0])
|
columns_to_variables[column][0])
|
||||||
elif isinstance(column, (layers.feature_column._CrossedColumn,
|
elif isinstance(
|
||||||
layers.feature_column._SparseColumn)):
|
column,
|
||||||
|
(
|
||||||
|
layers.feature_column._CrossedColumn, # pylint: disable=protected-access
|
||||||
|
layers.feature_column._SparseColumn)): # pylint: disable=protected-access
|
||||||
sparse_features.append(
|
sparse_features.append(
|
||||||
SparseFeatureColumn(
|
SparseFeatureColumn(
|
||||||
array_ops.reshape(
|
array_ops.reshape(
|
||||||
@ -168,10 +173,9 @@ class SDCAOptimizer(object):
|
|||||||
value=transformed_tensor.indices,
|
value=transformed_tensor.indices,
|
||||||
num_or_size_splits=2,
|
num_or_size_splits=2,
|
||||||
axis=1)[0], [-1]),
|
axis=1)[0], [-1]),
|
||||||
array_ops.reshape(transformed_tensor.values, [-1]),
|
array_ops.reshape(transformed_tensor.values, [-1]), None))
|
||||||
None))
|
|
||||||
sparse_feature_weights.append(columns_to_variables[column][0])
|
sparse_feature_weights.append(columns_to_variables[column][0])
|
||||||
elif isinstance(column, layers.feature_column._WeightedSparseColumn):
|
elif isinstance(column, layers.feature_column._WeightedSparseColumn): # pylint: disable=protected-access
|
||||||
id_tensor = column.id_tensor(transformed_tensor)
|
id_tensor = column.id_tensor(transformed_tensor)
|
||||||
weight_tensor = column.weight_tensor(transformed_tensor)
|
weight_tensor = column.weight_tensor(transformed_tensor)
|
||||||
sparse_feature_with_values.append(
|
sparse_feature_with_values.append(
|
||||||
@ -187,7 +191,6 @@ class SDCAOptimizer(object):
|
|||||||
else:
|
else:
|
||||||
raise ValueError('SDCAOptimizer does not support column type %s.' %
|
raise ValueError('SDCAOptimizer does not support column type %s.' %
|
||||||
type(column).__name__)
|
type(column).__name__)
|
||||||
# pylint: enable=protected-access
|
|
||||||
|
|
||||||
example_weights = array_ops.reshape(
|
example_weights = array_ops.reshape(
|
||||||
features[weight_column_name],
|
features[weight_column_name],
|
||||||
@ -195,10 +198,11 @@ class SDCAOptimizer(object):
|
|||||||
example_ids = features[self._example_id_column]
|
example_ids = features[self._example_id_column]
|
||||||
sparse_feature_with_values.extend(sparse_features)
|
sparse_feature_with_values.extend(sparse_features)
|
||||||
sparse_feature_with_values_weights.extend(sparse_feature_weights)
|
sparse_feature_with_values_weights.extend(sparse_feature_weights)
|
||||||
examples = dict(sparse_features=sparse_feature_with_values,
|
examples = dict(
|
||||||
|
sparse_features=sparse_feature_with_values,
|
||||||
dense_features=dense_features,
|
dense_features=dense_features,
|
||||||
example_labels=math_ops.to_float(array_ops.reshape(
|
example_labels=math_ops.to_float(
|
||||||
targets, shape=[-1])),
|
array_ops.reshape(targets, shape=[-1])),
|
||||||
example_weights=example_weights,
|
example_weights=example_weights,
|
||||||
example_ids=example_ids)
|
example_ids=example_ids)
|
||||||
sdca_variables = dict(
|
sdca_variables = dict(
|
||||||
|
Loading…
Reference in New Issue
Block a user