diff --git a/tensorflow/contrib/linear_optimizer/BUILD b/tensorflow/contrib/linear_optimizer/BUILD
index 21f02b0a967..472633b5c7c 100644
--- a/tensorflow/contrib/linear_optimizer/BUILD
+++ b/tensorflow/contrib/linear_optimizer/BUILD
@@ -111,13 +111,11 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":sdca_ops_py",
-        ":sparse_feature_column_py",
         "//tensorflow/contrib/framework:framework_py",
         "//tensorflow/contrib/layers:layers_py",
         "//tensorflow/contrib/learn",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
-        "//tensorflow/python:math_ops",
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:tensor_util",
         "//tensorflow/python:training",
diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
index 733b03eed36..f4961ab9dbf 100644
--- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
+++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
@@ -24,13 +24,10 @@ from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
 from tensorflow.contrib.learn.python.learn.estimators import prediction_key
 from tensorflow.contrib.linear_optimizer.python import sdca_optimizer
-from tensorflow.contrib.linear_optimizer.python.ops import sdca_ops
-from tensorflow.contrib.linear_optimizer.python.ops.sparse_feature_column import SparseFeatureColumn
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.training import session_run_hook
 
@@ -76,131 +73,6 @@ def _add_bias_column(feature_columns, columns_to_tensors, bias_variable,
   columns_to_variables[bias_column] = [bias_variable]
 
 
-def _get_sdca_train_step(optimizer, columns_to_variables, weight_column_name,
-                         loss_type, features, targets, global_step):
-  """Returns the training operation of an SdcaModel optimizer."""
-
-  def _dense_tensor_to_sparse_feature_column(dense_tensor):
-    """Returns SparseFeatureColumn for the input dense_tensor."""
-    ignore_value = 0.0
-    sparse_indices = array_ops.where(
-        math_ops.not_equal(dense_tensor,
-                           math_ops.cast(ignore_value, dense_tensor.dtype)))
-    sparse_values = array_ops.gather_nd(dense_tensor, sparse_indices)
-    # TODO(sibyl-Aix6ihai, sibyl-vie3Poto): Makes this efficient, as now SDCA supports
-    # very sparse features with weights and not weights.
-    return SparseFeatureColumn(
-        array_ops.reshape(
-            array_ops.split(value=sparse_indices, num_or_size_splits=2,
-                            axis=1)[0], [-1]),
-        array_ops.reshape(
-            array_ops.split(value=sparse_indices, num_or_size_splits=2,
-                            axis=1)[1], [-1]),
-        array_ops.reshape(math_ops.to_float(sparse_values), [-1]))
-
-  def _training_examples_and_variables():
-    """Returns dictionaries for training examples and variables."""
-    batch_size = targets.get_shape()[0]
-
-    # Iterate over all feature columns and create appropriate lists for dense
-    # and sparse features as well as dense and sparse weights (variables) for
-    # SDCA.
-    # TODO(sibyl-vie3Poto): Reshape variables stored as values in column_to_variables
-    # dict as 1-dimensional tensors.
-    dense_features, sparse_features, sparse_feature_with_values = [], [], []
-    dense_feature_weights = []
-    sparse_feature_weights, sparse_feature_with_values_weights = [], []
-    for column in sorted(columns_to_variables.keys(), key=lambda x: x.key):
-      transformed_tensor = features[column]
-      if isinstance(column, layers.feature_column._RealValuedColumn):  # pylint: disable=protected-access
-        # A real-valued column corresponds to a dense feature in SDCA. A
-        # transformed tensor corresponding to a RealValuedColumn has rank 2
-        # (its shape is typically [batch_size, column.dimension]) and so it
-        # can be passed to SDCA as is.
-        dense_features.append(transformed_tensor)
-        # For real valued columns, the variables list contains exactly one
-        # element.
-        dense_feature_weights.append(columns_to_variables[column][0])
-      elif isinstance(column, layers.feature_column._BucketizedColumn):  # pylint: disable=protected-access
-        # A bucketized column corresponds to a sparse feature in SDCA. The
-        # bucketized feature is "sparsified" for SDCA by converting it to a
-        # SparseFeatureColumn respresenting the one-hot encoding of the
-        # bucketized feature.
-        #
-        # TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a
-        # bucketized feature column to a dense feature in SDCA. This will likely
-        # depend on the number of buckets.
-        dense_bucket_tensor = column._to_dnn_input_layer(transformed_tensor)  # pylint: disable=protected-access
-        sparse_feature_column = _dense_tensor_to_sparse_feature_column(
-            dense_bucket_tensor)
-        sparse_feature_with_values.append(sparse_feature_column)
-        # For bucketized columns, the variables list contains exactly one
-        # element.
-        sparse_feature_with_values_weights.append(
-            columns_to_variables[column][0])
-      elif isinstance(
-          column,
-          (
-              layers.feature_column._CrossedColumn,  # pylint: disable=protected-access
-              layers.feature_column._SparseColumn)):  # pylint: disable=protected-access
-        sparse_features.append(
-            SparseFeatureColumn(
-                array_ops.reshape(
-                    array_ops.split(
-                        value=transformed_tensor.indices,
-                        num_or_size_splits=2,
-                        axis=1)[0], [-1]),
-                array_ops.reshape(transformed_tensor.values, [-1]), None))
-        sparse_feature_weights.append(columns_to_variables[column][0])
-      elif isinstance(column, layers.feature_column._WeightedSparseColumn):  # pylint: disable=protected-access
-        id_tensor = column.id_tensor(transformed_tensor)
-        weight_tensor = column.weight_tensor(transformed_tensor)
-        sparse_feature_with_values.append(
-            SparseFeatureColumn(
-                array_ops.reshape(
-                    array_ops.split(
-                        value=id_tensor.indices, num_or_size_splits=2, axis=1)[
-                            0], [-1]),
-                array_ops.reshape(id_tensor.values, [-1]),
-                array_ops.reshape(weight_tensor.values, [-1])))
-        sparse_feature_with_values_weights.append(
-            columns_to_variables[column][0])
-      else:
-        raise ValueError("SDCAOptimizer does not support column type {}".format(
-            type(column).__name__))
-
-    example_weights = array_ops.reshape(
-        features[weight_column_name],
-        shape=[-1]) if weight_column_name else array_ops.ones([batch_size])
-    example_ids = features[optimizer.example_id_column]
-    sparse_feature_with_values.extend(sparse_features)
-    sparse_feature_with_values_weights.extend(sparse_feature_weights)
-    examples = dict(
-        sparse_features=sparse_feature_with_values,
-        dense_features=dense_features,
-        example_labels=math_ops.to_float(
-            array_ops.reshape(targets, shape=[-1])),
-        example_weights=example_weights,
-        example_ids=example_ids)
-    sdca_variables = dict(
-        sparse_features_weights=sparse_feature_with_values_weights,
-        dense_features_weights=dense_feature_weights)
-    return examples, sdca_variables
-
-  training_examples, training_variables = _training_examples_and_variables()
-  sdca_model = sdca_ops.SdcaModel(
-      examples=training_examples,
-      variables=training_variables,
-      options=dict(
-          symmetric_l1_regularization=optimizer.symmetric_l1_regularization,
-          symmetric_l2_regularization=optimizer.symmetric_l2_regularization,
-          num_loss_partitions=optimizer.num_loss_partitions,
-          num_table_shards=optimizer.num_table_shards,
-          loss_type=loss_type))
-  train_op = sdca_model.minimize(global_step=global_step)
-  return sdca_model, train_op
-
-
 def sdca_model_fn(features, labels, mode, params, config=None):
   """A model_fn for linear models that use the SDCA optimizer.
 
@@ -283,9 +155,9 @@ def sdca_model_fn(features, labels, mode, params, config=None):
 
   def _train_op_fn(unused_loss):
     global_step = contrib_variables.get_global_step()
-    sdca_model, train_op = _get_sdca_train_step(optimizer, columns_to_variables,
-                                                weight_column_name, loss_type,
-                                                features, labels, global_step)
+    sdca_model, train_op = optimizer.get_train_step(
+        columns_to_variables, weight_column_name, loss_type, features, labels,
+        global_step)
     if update_weights_hook is not None:
       update_weights_hook.set_parameters(sdca_model, train_op)
     return train_op
diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py
index f9d69d6dea9..65a7116c237 100644
--- a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py
+++ b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py
@@ -99,16 +99,16 @@ class SDCAOptimizer(object):
   def symmetric_l2_regularization(self):
     return self._symmetric_l2_regularization
 
-  def get_train_step(self, columns_to_variables,
-                     weight_column_name, loss_type, features, targets,
-                     global_step):
+  def get_train_step(self, columns_to_variables, weight_column_name, loss_type,
+                     features, targets, global_step):
     """Returns the training operation of an SdcaModel optimizer."""
 
-    def _tensor_to_sparse_feature_column(dense_tensor):
+    def _dense_tensor_to_sparse_feature_column(dense_tensor):
       """Returns SparseFeatureColumn for the input dense_tensor."""
       ignore_value = 0.0
-      sparse_indices = array_ops.where(math_ops.not_equal(
-          dense_tensor, math_ops.cast(ignore_value, dense_tensor.dtype)))
+      sparse_indices = array_ops.where(
+          math_ops.not_equal(dense_tensor,
+                             math_ops.cast(ignore_value, dense_tensor.dtype)))
       sparse_values = array_ops.gather_nd(dense_tensor, sparse_indices)
       # TODO(sibyl-Aix6ihai, sibyl-vie3Poto): Makes this efficient, as now SDCA supports
       # very sparse features with weights and not weights.
@@ -133,10 +133,9 @@ class SDCAOptimizer(object):
       dense_features, sparse_features, sparse_feature_with_values = [], [], []
       dense_feature_weights = []
       sparse_feature_weights, sparse_feature_with_values_weights = [], []
-      # pylint: disable=protected-access
       for column in sorted(columns_to_variables.keys(), key=lambda x: x.key):
         transformed_tensor = features[column]
-        if isinstance(column, layers.feature_column._RealValuedColumn):
+        if isinstance(column, layers.feature_column._RealValuedColumn):  # pylint: disable=protected-access
           # A real-valued column corresponds to a dense feature in SDCA. A
           # transformed tensor corresponding to a RealValuedColumn has rank 2
           # (its shape is typically [batch_size, column.dimension]) and so it
@@ -145,22 +144,28 @@ class SDCAOptimizer(object):
           # For real valued columns, the variables list contains exactly one
           # element.
           dense_feature_weights.append(columns_to_variables[column][0])
-        elif isinstance(column, layers.feature_column._BucketizedColumn):
+        elif isinstance(column, layers.feature_column._BucketizedColumn):  # pylint: disable=protected-access
           # A bucketized column corresponds to a sparse feature in SDCA. The
           # bucketized feature is "sparsified" for SDCA by converting it to a
           # SparseFeatureColumn respresenting the one-hot encoding of the
           # bucketized feature.
-          dense_bucket_tensor = layers.input_from_feature_columns(
-              {column: transformed_tensor}, [column])
-          sparse_feature_column = _tensor_to_sparse_feature_column(
+          #
+          # TODO(sibyl-vie3Poto): Explore whether it is more efficient to translate a
+          # bucketized feature column to a dense feature in SDCA. This will
+          # likely depend on the number of buckets.
+          dense_bucket_tensor = column._to_dnn_input_layer(transformed_tensor)  # pylint: disable=protected-access
+          sparse_feature_column = _dense_tensor_to_sparse_feature_column(
               dense_bucket_tensor)
           sparse_feature_with_values.append(sparse_feature_column)
           # For bucketized columns, the variables list contains exactly one
           # element.
           sparse_feature_with_values_weights.append(
               columns_to_variables[column][0])
-        elif isinstance(column, (layers.feature_column._CrossedColumn,
-                                 layers.feature_column._SparseColumn)):
+        elif isinstance(
+            column,
+            (
+                layers.feature_column._CrossedColumn,  # pylint: disable=protected-access
+                layers.feature_column._SparseColumn)):  # pylint: disable=protected-access
           sparse_features.append(
               SparseFeatureColumn(
                   array_ops.reshape(
@@ -168,10 +173,9 @@ class SDCAOptimizer(object):
                           value=transformed_tensor.indices,
                           num_or_size_splits=2,
                           axis=1)[0], [-1]),
-                  array_ops.reshape(transformed_tensor.values, [-1]),
-                  None))
+                  array_ops.reshape(transformed_tensor.values, [-1]), None))
           sparse_feature_weights.append(columns_to_variables[column][0])
-        elif isinstance(column, layers.feature_column._WeightedSparseColumn):
+        elif isinstance(column, layers.feature_column._WeightedSparseColumn):  # pylint: disable=protected-access
           id_tensor = column.id_tensor(transformed_tensor)
           weight_tensor = column.weight_tensor(transformed_tensor)
           sparse_feature_with_values.append(
@@ -183,11 +187,10 @@ class SDCAOptimizer(object):
                   array_ops.reshape(id_tensor.values, [-1]),
                   array_ops.reshape(weight_tensor.values, [-1])))
           sparse_feature_with_values_weights.append(
-            columns_to_variables[column][0])
+              columns_to_variables[column][0])
         else:
           raise ValueError('SDCAOptimizer does not support column type %s.' %
                            type(column).__name__)
-      # pylint: enable=protected-access
 
       example_weights = array_ops.reshape(
           features[weight_column_name],
@@ -195,12 +198,13 @@ class SDCAOptimizer(object):
       example_ids = features[self._example_id_column]
       sparse_feature_with_values.extend(sparse_features)
       sparse_feature_with_values_weights.extend(sparse_feature_weights)
-      examples = dict(sparse_features=sparse_feature_with_values,
-                      dense_features=dense_features,
-                      example_labels=math_ops.to_float(array_ops.reshape(
-                          targets, shape=[-1])),
-                      example_weights=example_weights,
-                      example_ids=example_ids)
+      examples = dict(
+          sparse_features=sparse_feature_with_values,
+          dense_features=dense_features,
+          example_labels=math_ops.to_float(
+              array_ops.reshape(targets, shape=[-1])),
+          example_weights=example_weights,
+          example_ids=example_ids)
       sdca_variables = dict(
           sparse_features_weights=sparse_feature_with_values_weights,
           dense_features_weights=dense_feature_weights)