Merge branch 'master' of https://github.com/tensorflow/tensorflow

2016-10-17 20:52:26 +01:00 · 2016-10-17 20:52:26 +01:00 · 2050900830
commit 2050900830
parent 5ce656d4ba 28166c0862
293 changed files with 12030 additions and 4654 deletions
--- a/2
+++ b/2
@ -109,7 +109,7 @@ fi

 ## Find swig path
 if [ -z "$SWIG_PATH" ]; then
-  SWIG_PATH=`type -p swig 2> /dev/null`
+  SWIG_PATH=`type -p swig 2> /dev/null || true`
 fi
 if [[ ! -e "$SWIG_PATH" ]]; then
  echo "Can't find swig.  Ensure swig is in \$PATH or set \$SWIG_PATH."
--- a/tensorflow/cc/saved_model/BUILD
+++ b/tensorflow/cc/saved_model/BUILD
@ -28,7 +28,6 @@ cc_library(
    deps = [
        ":constants",
        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:tensorflow",
@ -45,7 +44,9 @@ tf_cc_test(
    deps = [
        ":constants",
        ":loader",
+        ":signature_constants",
        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:test",
        "//tensorflow/core:test_main",
        "//tensorflow/core:testlib",
--- a/tensorflow/cc/saved_model/constants.h
+++ b/tensorflow/cc/saved_model/constants.h
@ -18,6 +18,12 @@ limitations under the License.

 namespace tensorflow {

+// SavedModel assets directory.
+constexpr char kSavedModelAssetsDirectory[] = "assets";
+
+// SavedModel assets key for graph collection-def.
+constexpr char kSavedModelAssetsKey[] = "saved_model_assets";
+
 // SavedModel proto filename.
 constexpr char kSavedModelFilenamePb[] = "saved_model.pb";

--- a/tensorflow/cc/saved_model/loader_test.cc
+++ b/tensorflow/cc/saved_model/loader_test.cc
@ -16,6 +16,9 @@ limitations under the License.
 #include "tensorflow/cc/saved_model/loader.h"

 #include "tensorflow/cc/saved_model/constants.h"
+#include "tensorflow/cc/saved_model/signature_constants.h"
+#include "tensorflow/core/example/example.pb.h"
+#include "tensorflow/core/example/feature.pb.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@ -34,17 +37,35 @@ class LoaderTest : public ::testing::Test {
 protected:
  LoaderTest() {}

-  void CheckSavedModelBundle(const SavedModelBundle& bundle) {
-    // Validate the half plus two behavior.
-    Tensor input = test::AsTensor<float>({0, 1, 2, 3}, TensorShape({4, 1}));
+  string MakeSerializedExample(float x) {
+    tensorflow::Example example;
+    auto* feature_map = example.mutable_features()->mutable_feature();
+    (*feature_map)["x"].mutable_float_list()->add_value(x);
+    return example.SerializeAsString();
+  }
+
+  void CheckSavedModelBundle(const string& export_dir,
+                             const SavedModelBundle& bundle) {
+    const string asset_path =
+        io::JoinPath(export_dir, kSavedModelAssetsDirectory, "foo.txt");
+    EXPECT_TRUE(Env::Default()->FileExists(asset_path));

    // Retrieve the regression signature from meta graph def.
    const auto signature_def_map = bundle.meta_graph_def.signature_def();
-    const auto signature_def = signature_def_map.at("regression");
+    const auto signature_def = signature_def_map.at(kRegressMethodName);

-    const string input_name = signature_def.inputs().at("input").name();
-    const string output_name = signature_def.outputs().at("output").name();
+    const string input_name = signature_def.inputs().at(kRegressInputs).name();
+    const string output_name =
+        signature_def.outputs().at(kRegressOutputs).name();

+    std::vector<string> serialized_examples;
+    for (float x : {0, 1, 2, 3}) {
+      serialized_examples.push_back(MakeSerializedExample(x));
+    }
+
+    // Validate the half plus two behavior.
+    Tensor input =
+        test::AsTensor<string>(serialized_examples, TensorShape({4}));
    std::vector<Tensor> outputs;
    TF_ASSERT_OK(bundle.session->Run({{input_name, input}}, {output_name}, {},
                                     &outputs));
@ -65,11 +86,11 @@ TEST_F(LoaderTest, ResourceLeakTest) {
  RunOptions run_options;

  const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
  for (int i = 0; i < 100; ++i) {
    TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir,
                                {kSavedModelTagServe}, &bundle));
-    CheckSavedModelBundle(bundle);
+    CheckSavedModelBundle(export_dir, bundle);
  }
 }

@ -79,10 +100,10 @@ TEST_F(LoaderTest, TagMatch) {
  RunOptions run_options;

  const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
  TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir,
                              {kSavedModelTagServe}, &bundle));
-  CheckSavedModelBundle(bundle);
+  CheckSavedModelBundle(export_dir, bundle);
 }

 TEST_F(LoaderTest, NoTagMatch) {
@ -91,7 +112,7 @@ TEST_F(LoaderTest, NoTagMatch) {
  SessionOptions session_options;

  const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
  Status st = LoadSavedModel(session_options, run_options, export_dir,
                             {"missing-tag"}, &bundle);
  EXPECT_FALSE(st.ok());
@ -107,7 +128,7 @@ TEST_F(LoaderTest, NoTagMatchMultiple) {
  SessionOptions session_options;

  const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
  Status st = LoadSavedModel(session_options, run_options, export_dir,
                             {kSavedModelTagServe, "missing-tag"}, &bundle);
  EXPECT_FALSE(st.ok());
@ -126,19 +147,19 @@ TEST_F(LoaderTest, PbtxtFormat) {
      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPbTxt);
  TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir,
                              {kSavedModelTagServe}, &bundle));
-  CheckSavedModelBundle(bundle);
+  CheckSavedModelBundle(export_dir, bundle);
 }

-TEST_F(LoaderTest, ShardedVariables) {
+TEST_F(LoaderTest, SingleShardVariables) {
  SavedModelBundle bundle;
  SessionOptions session_options;
  RunOptions run_options;

  const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
  TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir,
                              {kSavedModelTagServe}, &bundle));
-  CheckSavedModelBundle(bundle);
+  CheckSavedModelBundle(export_dir, bundle);
 }

 TEST_F(LoaderTest, InvalidExportPath) {
@ -156,7 +177,7 @@ TEST_F(LoaderTest, InvalidExportPath) {
 TEST_F(LoaderTest, MaybeSavedModelDirectory) {
  // Valid SavedModel directory.
  const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
  EXPECT_TRUE(MaybeSavedModelDirectory(export_dir));

  // Directory that does not exist.
--- a/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt
@ -0,0 +1 @@
+asset-file-contents
--- a/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb
--- a/tensorflow/cc/saved_model/testdata/half_plus_two/variables/checkpoint
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two/variables/checkpoint
@ -1,2 +0,0 @@
-model_checkpoint_path: "/tmp/saved_model/half_plus_two/variables/variables"
-all_model_checkpoint_paths: "/tmp/saved_model/half_plus_two/variables/variables"
--- a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/assets/foo.txt
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/assets/foo.txt
@ -0,0 +1 @@
+asset-file-contents
--- a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt
@ -140,6 +140,88 @@ meta_graphs {
      op {
        name: "NoOp"
      }
+      op {
+        name: "ParseExample"
+        input_arg {
+          name: "serialized"
+          type: DT_STRING
+        }
+        input_arg {
+          name: "names"
+          type: DT_STRING
+        }
+        input_arg {
+          name: "sparse_keys"
+          type: DT_STRING
+          number_attr: "Nsparse"
+        }
+        input_arg {
+          name: "dense_keys"
+          type: DT_STRING
+          number_attr: "Ndense"
+        }
+        input_arg {
+          name: "dense_defaults"
+          type_list_attr: "Tdense"
+        }
+        output_arg {
+          name: "sparse_indices"
+          type: DT_INT64
+          number_attr: "Nsparse"
+        }
+        output_arg {
+          name: "sparse_values"
+          type_list_attr: "sparse_types"
+        }
+        output_arg {
+          name: "sparse_shapes"
+          type: DT_INT64
+          number_attr: "Nsparse"
+        }
+        output_arg {
+          name: "dense_values"
+          type_list_attr: "Tdense"
+        }
+        attr {
+          name: "Nsparse"
+          type: "int"
+          has_minimum: true
+        }
+        attr {
+          name: "Ndense"
+          type: "int"
+          has_minimum: true
+        }
+        attr {
+          name: "sparse_types"
+          type: "list(type)"
+          has_minimum: true
+          allowed_values {
+            list {
+              type: DT_FLOAT
+              type: DT_INT64
+              type: DT_STRING
+            }
+          }
+        }
+        attr {
+          name: "Tdense"
+          type: "list(type)"
+          has_minimum: true
+          allowed_values {
+            list {
+              type: DT_FLOAT
+              type: DT_INT64
+              type: DT_STRING
+            }
+          }
+        }
+        attr {
+          name: "dense_shapes"
+          type: "list(shape)"
+          has_minimum: true
+        }
+      }
      op {
        name: "Placeholder"
        output_arg {
@ -160,33 +242,28 @@ meta_graphs {
        }
      }
      op {
-        name: "RestoreSlice"
+        name: "RestoreV2"
        input_arg {
-          name: "file_pattern"
+          name: "prefix"
          type: DT_STRING
        }
        input_arg {
-          name: "tensor_name"
+          name: "tensor_names"
          type: DT_STRING
        }
        input_arg {
-          name: "shape_and_slice"
+          name: "shape_and_slices"
          type: DT_STRING
        }
        output_arg {
-          name: "tensor"
-          type_attr: "dt"
+          name: "tensors"
+          type_list_attr: "dtypes"
        }
        attr {
-          name: "dt"
-          type: "type"
-        }
-        attr {
-          name: "preferred_shard"
-          type: "int"
-          default_value {
-            i: -1
-          }
+          name: "dtypes"
+          type: "list(type)"
+          has_minimum: true
+          minimum: 1
        }
      }
      op {
@ -214,6 +291,40 @@ meta_graphs {
          minimum: 1
        }
      }
+      op {
+        name: "ShardedFilename"
+        input_arg {
+          name: "basename"
+          type: DT_STRING
+        }
+        input_arg {
+          name: "shard"
+          type: DT_INT32
+        }
+        input_arg {
+          name: "num_shards"
+          type: DT_INT32
+        }
+        output_arg {
+          name: "filename"
+          type: DT_STRING
+        }
+      }
+      op {
+        name: "ShardedFilespec"
+        input_arg {
+          name: "basename"
+          type: DT_STRING
+        }
+        input_arg {
+          name: "num_shards"
+          type: DT_INT32
+        }
+        output_arg {
+          name: "filename"
+          type: DT_STRING
+        }
+      }
      op {
        name: "Variable"
        output_arg {
@ -524,7 +635,7 @@ meta_graphs {
      }
    }
    node {
-      name: "x"
+      name: "tf_example"
      op: "Placeholder"
      attr {
        key: "_output_shapes"
@ -539,7 +650,7 @@ meta_graphs {
      attr {
        key: "dtype"
        value {
-          type: DT_FLOAT
+          type: DT_STRING
        }
      }
      attr {
@ -550,6 +661,190 @@ meta_graphs {
        }
      }
    }
+    node {
+      name: "ParseExample/Const"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_FLOAT
+            tensor_shape {
+              dim {
+              }
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "ParseExample/ParseExample/names"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_STRING
+            tensor_shape {
+              dim {
+              }
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "ParseExample/ParseExample/dense_keys_0"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_STRING
+            tensor_shape {
+            }
+            string_val: "x"
+          }
+        }
+      }
+    }
+    node {
+      name: "ParseExample/ParseExample"
+      op: "ParseExample"
+      input: "tf_example"
+      input: "ParseExample/ParseExample/names"
+      input: "ParseExample/ParseExample/dense_keys_0"
+      input: "ParseExample/Const"
+      attr {
+        key: "Ndense"
+        value {
+          i: 1
+        }
+      }
+      attr {
+        key: "Nsparse"
+        value {
+          i: 0
+        }
+      }
+      attr {
+        key: "Tdense"
+        value {
+          list {
+            type: DT_FLOAT
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dense_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "sparse_types"
+        value {
+          list {
+          }
+        }
+      }
+    }
+    node {
+      name: "x"
+      op: "Identity"
+      input: "ParseExample/ParseExample"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+    }
    node {
      name: "Mul"
      op: "Mul"
@ -566,7 +861,12 @@ meta_graphs {
        value {
          list {
            shape {
-              unknown_rank: true
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
            }
          }
        }
@ -588,7 +888,38 @@ meta_graphs {
        value {
          list {
            shape {
-              unknown_rank: true
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "Identity"
+      op: "Identity"
+      input: "y"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
            }
          }
        }
@ -630,6 +961,82 @@ meta_graphs {
        }
      }
    }
+    node {
+      name: "save/num_shards"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT32
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_INT32
+            tensor_shape {
+            }
+            int_val: 1
+          }
+        }
+      }
+    }
+    node {
+      name: "save/ShardedFilename/shard"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT32
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_INT32
+            tensor_shape {
+            }
+            int_val: 0
+          }
+        }
+      }
+    }
+    node {
+      name: "save/ShardedFilename"
+      op: "ShardedFilename"
+      input: "save/Const"
+      input: "save/ShardedFilename/shard"
+      input: "save/num_shards"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+    }
    node {
      name: "save/save/tensor_names"
      op: "Const"
@ -707,7 +1114,7 @@ meta_graphs {
    node {
      name: "save/save"
      op: "SaveSlices"
-      input: "save/Const"
+      input: "save/ShardedFilename"
      input: "save/save/tensor_names"
      input: "save/save/shapes_and_slices"
      input: "a"
@ -725,7 +1132,7 @@ meta_graphs {
    node {
      name: "save/control_dependency"
      op: "Identity"
-      input: "save/Const"
+      input: "save/ShardedFilename"
      input: "^save/save"
      attr {
        key: "T"
@ -737,7 +1144,7 @@ meta_graphs {
        key: "_class"
        value {
          list {
-            s: "loc:@save/Const"
+            s: "loc:@save/ShardedFilename"
          }
        }
      }
@ -752,13 +1159,32 @@ meta_graphs {
      }
    }
    node {
-      name: "save/restore_slice/tensor_name"
+      name: "save/ShardedFilespec"
+      op: "ShardedFilespec"
+      input: "save/Const"
+      input: "save/num_shards"
+      input: "^save/control_dependency"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "save/RestoreV2/tensor_names"
      op: "Const"
      attr {
        key: "_output_shapes"
        value {
          list {
            shape {
+              dim {
+                size: 1
+              }
            }
          }
        }
@ -775,6 +1201,9 @@ meta_graphs {
          tensor {
            dtype: DT_STRING
            tensor_shape {
+              dim {
+                size: 1
+              }
            }
            string_val: "a"
          }
@ -782,13 +1211,16 @@ meta_graphs {
      }
    }
    node {
-      name: "save/restore_slice/shape_and_slice"
+      name: "save/RestoreV2/shape_and_slices"
      op: "Const"
      attr {
        key: "_output_shapes"
        value {
          list {
            shape {
+              dim {
+                size: 1
+              }
            }
          }
        }
@ -805,6 +1237,9 @@ meta_graphs {
          tensor {
            dtype: DT_STRING
            tensor_shape {
+              dim {
+                size: 1
+              }
            }
            string_val: ""
          }
@ -812,11 +1247,11 @@ meta_graphs {
      }
    }
    node {
-      name: "save/restore_slice"
-      op: "RestoreSlice"
+      name: "save/RestoreV2"
+      op: "RestoreV2"
      input: "save/Const"
-      input: "save/restore_slice/tensor_name"
-      input: "save/restore_slice/shape_and_slice"
+      input: "save/RestoreV2/tensor_names"
+      input: "save/RestoreV2/shape_and_slices"
      attr {
        key: "_output_shapes"
        value {
@ -828,15 +1263,11 @@ meta_graphs {
        }
      }
      attr {
-        key: "dt"
+        key: "dtypes"
        value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "preferred_shard"
-        value {
-          i: -1
+          list {
+            type: DT_FLOAT
+          }
        }
      }
    }
@ -844,7 +1275,7 @@ meta_graphs {
      name: "save/Assign"
      op: "Assign"
      input: "a"
-      input: "save/restore_slice"
+      input: "save/RestoreV2"
      attr {
        key: "T"
        value {
@ -882,13 +1313,16 @@ meta_graphs {
      }
    }
    node {
-      name: "save/restore_slice_1/tensor_name"
+      name: "save/RestoreV2_1/tensor_names"
      op: "Const"
      attr {
        key: "_output_shapes"
        value {
          list {
            shape {
+              dim {
+                size: 1
+              }
            }
          }
        }
@ -905,6 +1339,9 @@ meta_graphs {
          tensor {
            dtype: DT_STRING
            tensor_shape {
+              dim {
+                size: 1
+              }
            }
            string_val: "b"
          }
@ -912,13 +1349,16 @@ meta_graphs {
      }
    }
    node {
-      name: "save/restore_slice_1/shape_and_slice"
+      name: "save/RestoreV2_1/shape_and_slices"
      op: "Const"
      attr {
        key: "_output_shapes"
        value {
          list {
            shape {
+              dim {
+                size: 1
+              }
            }
          }
        }
@ -935,6 +1375,9 @@ meta_graphs {
          tensor {
            dtype: DT_STRING
            tensor_shape {
+              dim {
+                size: 1
+              }
            }
            string_val: ""
          }
@ -942,11 +1385,11 @@ meta_graphs {
      }
    }
    node {
-      name: "save/restore_slice_1"
-      op: "RestoreSlice"
+      name: "save/RestoreV2_1"
+      op: "RestoreV2"
      input: "save/Const"
-      input: "save/restore_slice_1/tensor_name"
-      input: "save/restore_slice_1/shape_and_slice"
+      input: "save/RestoreV2_1/tensor_names"
+      input: "save/RestoreV2_1/shape_and_slices"
      attr {
        key: "_output_shapes"
        value {
@ -958,15 +1401,11 @@ meta_graphs {
        }
      }
      attr {
-        key: "dt"
+        key: "dtypes"
        value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "preferred_shard"
-        value {
-          i: -1
+          list {
+            type: DT_FLOAT
+          }
        }
      }
    }
@ -974,7 +1413,7 @@ meta_graphs {
      name: "save/Assign_1"
      op: "Assign"
      input: "b"
-      input: "save/restore_slice_1"
+      input: "save/RestoreV2_1"
      attr {
        key: "T"
        value {
@ -1012,20 +1451,26 @@ meta_graphs {
      }
    }
    node {
-      name: "save/restore_all"
+      name: "save/restore_shard"
      op: "NoOp"
      input: "^save/Assign"
      input: "^save/Assign_1"
    }
+    node {
+      name: "save/restore_all"
+      op: "NoOp"
+      input: "^save/restore_shard"
+    }
    versions {
      producer: 15
    }
  }
  saver_def {
    filename_tensor_name: "save/Const:0"
-    save_tensor_name: "save/control_dependency:0"
+    save_tensor_name: "save/ShardedFilespec:0"
    restore_op_name: "save/restore_all"
    max_to_keep: 5
+    sharded: true
    keep_checkpoint_every_n_hours: 10000.0
    version: V1
  }
@ -1048,21 +1493,21 @@ meta_graphs {
    }
  }
  signature_def {
-    key: "regression"
+    key: "tensorflow/serving/regress"
    value {
      inputs {
-        key: "input"
+        key: "inputs"
        value {
-          name: "x:0"
+          name: "tf_example:0"
        }
      }
      outputs {
-        key: "output"
+        key: "outputs"
        value {
-          name: "y:0"
+          name: "Identity:0"
        }
      }
-      method_name: "regression"
+      method_name: "tensorflow/serving/regress"
    }
  }
 }
--- a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/variables/checkpoint
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/variables/checkpoint
@ -1,2 +0,0 @@
-model_checkpoint_path: "/tmp/saved_model/half_plus_two_pbtxt/variables/variables-?????-of-00001"
-all_model_checkpoint_paths: "/tmp/saved_model/half_plus_two_pbtxt/variables/variables-?????-of-00001"
--- a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/assets/foo.txt
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/assets/foo.txt
@ -0,0 +1 @@
+asset-file-contents
--- a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb
--- a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/variables/checkpoint
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/variables/checkpoint
@ -1,2 +0,0 @@
-model_checkpoint_path: "/tmp/saved_model/half_plus_two/variables/variables-?????-of-00001"
-all_model_checkpoint_paths: "/tmp/saved_model/half_plus_two/variables/variables-?????-of-00001"
--- a/tensorflow/contrib/distributions/init.py
+++ b/tensorflow/contrib/distributions/init.py
@ -97,6 +97,7 @@ from __future__ import print_function

 # pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member

+from tensorflow.contrib.distributions.python.ops import bijector
 from tensorflow.contrib.distributions.python.ops.bernoulli import *
 from tensorflow.contrib.distributions.python.ops.beta import *
 from tensorflow.contrib.distributions.python.ops.binomial import *
--- a/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py
@ -245,6 +245,23 @@ class BetaTest(tf.test.TestCase):
                          stats.beta.var(a, b),
                          atol=1e-1)

+  # Test that sampling with the same seed twice gives the same results.
+  def testBetaSampleMultipleTimes(self):
+    with self.test_session():
+      a_val = 1.
+      b_val = 2.
+      n_val = 100
+
+      tf.set_random_seed(654321)
+      beta1 = tf.contrib.distributions.Beta(a=a_val, b=b_val, name="beta1")
+      samples1 = beta1.sample_n(n_val, seed=123456).eval()
+
+      tf.set_random_seed(654321)
+      beta2 = tf.contrib.distributions.Beta(a=a_val, b=b_val, name="beta2")
+      samples2 = beta2.sample_n(n_val, seed=123456).eval()
+
+      self.assertAllClose(samples1, samples2)
+
  def testBetaSampleMultidimensional(self):
    with self.test_session():
      a = np.random.rand(3, 2, 2).astype(np.float32)
--- a/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py
@ -23,9 +23,20 @@ import math
 import numpy as np
 import tensorflow as tf

-from tensorflow.contrib.distributions.python.ops.bijector import _Exp
-from tensorflow.contrib.distributions.python.ops.bijector import _Identity
-from tensorflow.contrib.distributions.python.ops.bijector import _ShiftAndScale
+bijectors = tf.contrib.distributions.bijector
+rng = np.random.RandomState(42)
+
+
+class BaseBijectorTest(tf.test.TestCase):
+  """Tests properties of the Bijector base-class."""
+
+  def testBijector(self):
+    with self.test_session():
+      with self.assertRaisesRegexp(
+          TypeError,
+          ("Can't instantiate abstract class Bijector "
+           "with abstract methods __init__")):
+        bijectors.Bijector()


 class IdentityBijectorTest(tf.test.TestCase):
@ -33,7 +44,7 @@ class IdentityBijectorTest(tf.test.TestCase):

  def testBijector(self):
    with self.test_session():
-      bijector = _Identity()
+      bijector = bijectors.Identity()
      self.assertEqual("Identity", bijector.name)
      x = [[[0.],
            [1.]]]
@ -50,7 +61,7 @@ class ExpBijectorTest(tf.test.TestCase):

  def testBijector(self):
    with self.test_session():
-      bijector = _Exp(event_ndims=1)
+      bijector = bijectors.Exp(event_ndims=1)
      self.assertEqual("Exp", bijector.name)
      x = [[[1.],
            [2.]]]
@ -63,14 +74,39 @@ class ExpBijectorTest(tf.test.TestCase):
      self.assertAllClose([[0., -math.log(2.)]], jac.eval())


-class _ShiftAndScaleBijectorTest(tf.test.TestCase):
+class InlineBijectorTest(tf.test.TestCase):
+  """Tests the correctness of the inline constructed bijector."""
+
+  def testBijector(self):
+    with self.test_session():
+      exp = bijectors.Exp(event_ndims=1)
+      inline = bijectors.Inline(
+          forward_fn=tf.exp,
+          inverse_fn=tf.log,
+          inverse_log_det_jacobian_fn=(
+              lambda y: -tf.reduce_sum(tf.log(x), reduction_indices=-1)),
+          name="Exp")
+
+      self.assertEqual(exp.name, inline.name)
+      x = [[[1., 2.],
+            [3., 4.],
+            [5., 6.]]]
+      self.assertAllClose(exp.forward(x).eval(), inline.forward(x).eval())
+      self.assertAllClose(exp.inverse(x).eval(), inline.inverse(x).eval())
+      self.assertAllClose(exp.inverse_log_det_jacobian(x).eval(),
+                          inline.inverse_log_det_jacobian(x).eval())
+
+
+class ScaleAndShiftBijectorTest(tf.test.TestCase):
+  """Tests the correctness of the Y = scale * x + loc transformation."""

  def testProperties(self):
    with self.test_session():
      mu = -1.
      sigma = 2.
-      bijector = _ShiftAndScale(loc=mu, scale=sigma)
-      self.assertEqual("ShiftAndScale", bijector.name)
+      bijector = bijectors.ScaleAndShift(
+          loc=mu, scale=sigma)
+      self.assertEqual("ScaleAndShift", bijector.name)

  def testNoBatchScalar(self):
    with self.test_session() as sess:
@ -85,7 +121,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
      for run in (static_run, dynamic_run):
        mu = -1.
        sigma = 2.  # Scalar.
-        bijector = _ShiftAndScale(loc=mu, scale=sigma)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma)
        self.assertEqual(0, bijector.shaper.batch_ndims.eval())  # "no batches"
        self.assertEqual(0, bijector.shaper.event_ndims.eval())  # "is scalar"
        x = [1., 2, 3]  # Three scalar samples (no batches).
@ -107,7 +144,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
      for run in (static_run, dynamic_run):
        mu = -1.
        sigma = 2.  # Scalar.
-        bijector = _ShiftAndScale(loc=mu, scale=sigma)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma)
        self.assertEqual(0, bijector.shaper.batch_ndims.eval())  # "no batches"
        self.assertEqual(0, bijector.shaper.event_ndims.eval())  # "is scalar"
        x = [[1., 2, 3],
@ -134,7 +172,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
      for run in (static_run, dynamic_run):
        mu = [1.]
        sigma = [1.]  # One batch, scalar.
-        bijector = _ShiftAndScale(loc=mu, scale=sigma)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma)
        self.assertEqual(
            1, bijector.shaper.batch_ndims.eval())  # "one batch dim"
        self.assertEqual(
@ -158,7 +197,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
      for run in (static_run, dynamic_run):
        mu = [1., -1]
        sigma = [1., 1]  # Univariate, two batches.
-        bijector = _ShiftAndScale(loc=mu, scale=sigma)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma)
        self.assertEqual(
            1, bijector.shaper.batch_ndims.eval())  # "one batch dim"
        self.assertEqual(
@ -182,7 +222,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
      for run in (static_run, dynamic_run):
        mu = [1., -1]
        sigma = np.eye(2, dtype=np.float32)
-        bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma, event_ndims=1)
        self.assertEqual(0, bijector.shaper.batch_ndims.eval())  # "no batches"
        self.assertEqual(1, bijector.shaper.event_ndims.eval())  # "is vector"
        x = [1., 1]
@ -205,7 +246,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
      for run in (static_run, dynamic_run):
        mu = 1.
        sigma = np.eye(2, dtype=np.float32)
-        bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma, event_ndims=1)
        self.assertEqual(0, bijector.shaper.batch_ndims.eval())  # "no batches"
        self.assertEqual(1, bijector.shaper.event_ndims.eval())  # "is vector"
        x = [1., 1]
@ -231,7 +273,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
      feed_dict = {x: x_value, mu: mu_value, sigma: sigma_value, event_ndims:
                   event_ndims_value}

-      bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=event_ndims)
+      bijector = bijectors.ScaleAndShift(
+          loc=mu, scale=sigma, event_ndims=event_ndims)
      self.assertEqual(0, sess.run(bijector.shaper.batch_ndims, feed_dict))
      self.assertEqual(1, sess.run(bijector.shaper.event_ndims, feed_dict))
      self.assertAllClose([[2., 0]], sess.run(bijector.forward(x), feed_dict))
@ -252,7 +295,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
      for run in (static_run, dynamic_run):
        mu = [[1., -1]]
        sigma = np.array([np.eye(2, dtype=np.float32)])
-        bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma, event_ndims=1)
        self.assertEqual(
            1, bijector.shaper.batch_ndims.eval())  # "one batch dim"
        self.assertEqual(
@ -276,7 +320,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
      feed_dict = {x: x_value, mu: mu_value, sigma: sigma_value,
                   event_ndims: event_ndims_value}

-      bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=event_ndims)
+      bijector = bijectors.ScaleAndShift(
+          loc=mu, scale=sigma, event_ndims=event_ndims)
      self.assertEqual(1, sess.run(bijector.shaper.batch_ndims, feed_dict))
      self.assertEqual(1, sess.run(bijector.shaper.event_ndims, feed_dict))
      self.assertAllClose([[[2., 0]]], sess.run(bijector.forward(x), feed_dict))
@ -285,5 +330,65 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
          [0.], sess.run(bijector.inverse_log_det_jacobian(x), feed_dict))


+class SoftplusBijectorTest(tf.test.TestCase):
+  """Tests the correctness of the Y = g(X) = Log[1 + exp(X)] transformation."""
+
+  def _softplus(self, x):
+    return np.log(1 + np.exp(x))
+
+  def _softplus_inverse(self, y):
+    return np.log(np.exp(y) - 1)
+
+  def _softplus_ildj_before_reduction(self, y):
+    """Inverse log det jacobian, before being reduced."""
+    return -np.log(1 - np.exp(-y))
+
+  def testBijectorForwardInverseEventDimsZero(self):
+    with self.test_session():
+      bijector = bijectors.Softplus(event_ndims=0)
+      self.assertEqual("Softplus", bijector.name)
+      x = 2 * rng.randn(2, 10)
+      y = self._softplus(x)
+
+      self.assertAllClose(y, bijector.forward(x).eval())
+      self.assertAllClose(x, bijector.inverse(y).eval())
+      self.assertAllClose(
+          x, bijector.inverse_and_inverse_log_det_jacobian(y)[0].eval())
+
+  def testBijectorLogDetJacobianEventDimsZero(self):
+    with self.test_session():
+      bijector = bijectors.Softplus(event_ndims=0)
+      y = 2 * rng.rand(2, 10)
+      # No reduction needed if event_dims = 0.
+      ildj = self._softplus_ildj_before_reduction(y)
+
+      self.assertAllClose(ildj, bijector.inverse_log_det_jacobian(y).eval())
+      self.assertAllClose(
+          ildj, bijector.inverse_and_inverse_log_det_jacobian(y)[1].eval())
+
+  def testBijectorForwardInverseEventDimsOne(self):
+    with self.test_session():
+      bijector = bijectors.Softplus(event_ndims=1)
+      self.assertEqual("Softplus", bijector.name)
+      x = 2 * rng.randn(2, 10)
+      y = self._softplus(x)
+
+      self.assertAllClose(y, bijector.forward(x).eval())
+      self.assertAllClose(x, bijector.inverse(y).eval())
+      self.assertAllClose(
+          x, bijector.inverse_and_inverse_log_det_jacobian(y)[0].eval())
+
+  def testBijectorLogDetJacobianEventDimsOne(self):
+    with self.test_session():
+      bijector = bijectors.Softplus(event_ndims=1)
+      y = 2 * rng.rand(2, 10)
+      ildj_before = self._softplus_ildj_before_reduction(y)
+      ildj = np.sum(ildj_before, axis=1)
+
+      self.assertAllClose(ildj, bijector.inverse_log_det_jacobian(y).eval())
+      self.assertAllClose(
+          ildj, bijector.inverse_and_inverse_log_det_jacobian(y)[1].eval())
+
+
 if __name__ == "__main__":
  tf.test.main()
--- a/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py
@ -334,6 +334,32 @@ class MixtureTest(tf.test.TestCase):
        which_dist_samples = dist_sample_values[c][:size_c]
        self.assertAllClose(which_dist_samples, sample_values[which_c])

+  # Test that sampling with the same seed twice gives the same results.
+  def testSampleMultipleTimes(self):
+    # 5 component mixture.
+    logits = [-10.0, -5.0, 0.0, 5.0, 10.0]
+    mus = [-5.0, 0.0, 5.0, 4.0, 20.0]
+    sigmas = [0.1, 5.0, 3.0, 0.2, 4.0]
+
+    with self.test_session():
+      n = 100
+
+      tf.set_random_seed(654321)
+      components = [distributions_py.Normal(
+          mu=mu, sigma=sigma) for mu, sigma in zip(mus, sigmas)]
+      cat = distributions_py.Categorical(logits, dtype=tf.int32, name="cat1")
+      dist1 = distributions_py.Mixture(cat, components, name="mixture1")
+      samples1 = dist1.sample_n(n, seed=123456).eval()
+
+      tf.set_random_seed(654321)
+      components2 = [distributions_py.Normal(
+          mu=mu, sigma=sigma) for mu, sigma in zip(mus, sigmas)]
+      cat2 = distributions_py.Categorical(logits, dtype=tf.int32, name="cat2")
+      dist2 = distributions_py.Mixture(cat2, components2, name="mixture2")
+      samples2 = dist2.sample_n(n, seed=123456).eval()
+
+      self.assertAllClose(samples1, samples2)
+
  def testSampleScalarBatchMultivariate(self):
    with self.test_session() as sess:
      num_components = 3
--- a/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py
@ -108,20 +108,54 @@ class StudentTTest(tf.test.TestCase):
      df_v = 4.0
      mu_v = 3.0
      sigma_v = np.sqrt(10.0)
-      n = tf.constant(100000)
+      n = tf.constant(200000)
      student = tf.contrib.distributions.StudentT(df=df, mu=mu, sigma=sigma)
-      samples = student.sample_n(n, seed=137)
+      samples = student.sample_n(n)
      sample_values = samples.eval()
-      n = 100000
-      self.assertEqual(sample_values.shape, (n,))
-      self.assertAllClose(sample_values.mean(), mu_v, atol=1e-2)
+      n_val = 200000
+      self.assertEqual(sample_values.shape, (n_val,))
+      self.assertAllClose(sample_values.mean(), mu_v, rtol=1e-2, atol=0)
      self.assertAllClose(sample_values.var(),
                          sigma_v**2 * df_v / (df_v - 2),
-                          atol=.25)
+                          rtol=1e-2, atol=0)
      self._checkKLApprox(df_v, mu_v, sigma_v, sample_values)

-  def _testStudentSampleMultiDimensional(self):
-    # DISABLED: Please enable this test once b/issues/30149644 is resolved.
+  # Test that sampling with the same seed twice gives the same results.
+  def testStudentSampleMultipleTimes(self):
+    with self.test_session():
+      df = tf.constant(4.0)
+      mu = tf.constant(3.0)
+      sigma = tf.constant(math.sqrt(10.0))
+      df_v = 4.0
+      mu_v = 3.0
+      sigma_v = np.sqrt(10.0)
+      n = tf.constant(100)
+
+      tf.set_random_seed(654321)
+      student = tf.contrib.distributions.StudentT(
+          df=df, mu=mu, sigma=sigma, name="student_t1")
+      samples1 = student.sample_n(n, seed=123456).eval()
+
+      tf.set_random_seed(654321)
+      student2 = tf.contrib.distributions.StudentT(
+          df=df, mu=mu, sigma=sigma, name="student_t2")
+      samples2 = student2.sample_n(n, seed=123456).eval()
+
+      self.assertAllClose(samples1, samples2)
+
+  def testStudentSampleSmallDfNoNan(self):
+    with self.test_session():
+      df_v = [1e-1, 1e-5, 1e-10, 1e-20]
+      df = tf.constant(df_v)
+      n = tf.constant(200000)
+      student = tf.contrib.distributions.StudentT(df=df, mu=1.0, sigma=1.0)
+      samples = student.sample_n(n)
+      sample_values = samples.eval()
+      n_val = 200000
+      self.assertEqual(sample_values.shape, (n_val, 4))
+      self.assertTrue(np.all(np.logical_not(np.isnan(sample_values))))
+
+  def testStudentSampleMultiDimensional(self):
    with self.test_session():
      batch_size = 7
      df = tf.constant([[3.0, 7.0]] * batch_size)
@ -130,20 +164,22 @@ class StudentTTest(tf.test.TestCase):
      df_v = [3.0, 7.0]
      mu_v = [3.0, -3.0]
      sigma_v = [np.sqrt(10.0), np.sqrt(15.0)]
-      n = tf.constant(100000)
+      n = tf.constant(200000)
      student = tf.contrib.distributions.StudentT(df=df, mu=mu, sigma=sigma)
      samples = student.sample_n(n)
      sample_values = samples.eval()
-      self.assertEqual(samples.get_shape(), (100000, batch_size, 2))
-      self.assertAllClose(sample_values[:, 0, 0].mean(), mu_v[0], atol=.15)
+      self.assertEqual(samples.get_shape(), (200000, batch_size, 2))
+      self.assertAllClose(
+          sample_values[:, 0, 0].mean(), mu_v[0], rtol=1e-2, atol=0)
      self.assertAllClose(sample_values[:, 0, 0].var(),
                          sigma_v[0]**2 * df_v[0] / (df_v[0] - 2),
-                          atol=1)
+                          rtol=1e-1, atol=0)
      self._checkKLApprox(df_v[0], mu_v[0], sigma_v[0], sample_values[:, 0, 0])
-      self.assertAllClose(sample_values[:, 0, 1].mean(), mu_v[1], atol=.01)
+      self.assertAllClose(
+          sample_values[:, 0, 1].mean(), mu_v[1], rtol=1e-2, atol=0)
      self.assertAllClose(sample_values[:, 0, 1].var(),
                          sigma_v[1]**2 * df_v[1] / (df_v[1] - 2),
-                          atol=.25)
+                          rtol=1e-1, atol=0)
      self._checkKLApprox(df_v[0], mu_v[0], sigma_v[0], sample_values[:, 0, 1])

  def _checkKLApprox(self, df, mu, sigma, samples):
@ -337,8 +373,7 @@ class StudentTTest(tf.test.TestCase):
      mode = student.mode().eval()
      self.assertAllClose([-1., 0, 1], mode)

-  def _testPdfOfSample(self):
-    # DISABLED: Please enable this test once b/issues/30149644 is resolved.
+  def testPdfOfSample(self):
    with self.test_session() as sess:
      student = tf.contrib.distributions.StudentT(df=3., mu=np.pi, sigma=1.)
      num = 20000
@ -357,8 +392,7 @@ class StudentTTest(tf.test.TestCase):
      # Verify integral over sample*pdf ~= 1.
      self._assertIntegral(sample_vals, pdf_vals)

-  def _testPdfOfSampleMultiDims(self):
-    # DISABLED: Please enable this test once b/issues/30149644 is resolved.
+  def testPdfOfSampleMultiDims(self):
    with self.test_session() as sess:
      student = tf.contrib.distributions.StudentT(df=[7., 11.],
                                                  mu=[[5.], [6.]],
--- a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
@ -33,12 +33,8 @@ class TransformedDistributionTest(tf.test.TestCase):
      # Note: the Jacobian callable only works for this example; more generally
      # you may or may not need a reduce_sum.
      log_normal = tf.contrib.distributions.TransformedDistribution(
-          base_dist_cls=tf.contrib.distributions.Normal,
-          mu=mu,
-          sigma=sigma,
-          transform=lambda x: tf.exp(x),
-          inverse=lambda y: tf.log(y),
-          log_det_jacobian=(lambda x: x))
+          base_distribution=tf.contrib.distributions.Normal(mu=mu, sigma=sigma),
+          bijector=tf.contrib.distributions.bijector.Exp(event_ndims=0))
      sp_dist = stats.lognorm(s=sigma, scale=np.exp(mu))

      # sample
@ -67,12 +63,8 @@ class TransformedDistributionTest(tf.test.TestCase):
      mu = 3.0
      sigma = 0.02
      log_normal = tf.contrib.distributions.TransformedDistribution(
-          base_dist_cls=tf.contrib.distributions.Normal,
-          mu=mu,
-          sigma=sigma,
-          transform=lambda x: tf.exp(x),
-          inverse=None,
-          log_det_jacobian=(lambda x: tf.reduce_sum(x)))
+          base_distribution=tf.contrib.distributions.Normal(mu=mu, sigma=sigma),
+          bijector=tf.contrib.distributions.bijector.Exp(event_ndims=0))

      sample = log_normal.sample_n(1)
      sample_val, log_pdf_val = sess.run([sample, log_normal.log_pdf(sample)])
@ -82,10 +74,6 @@ class TransformedDistributionTest(tf.test.TestCase):
          log_pdf_val,
          atol=1e-2)

-      with self.assertRaisesRegexp(ValueError,
-                                   "was not returned from `sample`"):
-        log_normal.log_pdf(tf.constant(3.0))
-

 if __name__ == "__main__":
  tf.test.main()
--- a/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py
@ -149,6 +149,30 @@ class WishartCholeskyTest(tf.test.TestCase):
                          variance_estimate,
                          rtol=0.05)

+  # Test that sampling with the same seed twice gives the same results.
+  def testSampleMultipleTimes(self):
+    with self.test_session():
+      df = 4.
+      n_val = 100
+
+      tf.set_random_seed(654321)
+      chol_w1 = distributions.WishartCholesky(
+          df=df,
+          scale=chol(make_pd(1., 3)),
+          cholesky_input_output_matrices=False,
+          name="wishart1")
+      samples1 = chol_w1.sample_n(n_val, seed=123456).eval()
+
+      tf.set_random_seed(654321)
+      chol_w2 = distributions.WishartCholesky(
+          df=df,
+          scale=chol(make_pd(1., 3)),
+          cholesky_input_output_matrices=False,
+          name="wishart2")
+      samples2 = chol_w2.sample_n(n_val, seed=123456).eval()
+
+      self.assertAllClose(samples1, samples2)
+
  def testProb(self):
    with self.test_session():
      # Generate some positive definite (pd) matrices and their Cholesky
--- a/tensorflow/contrib/distributions/python/ops/beta.py
+++ b/tensorflow/contrib/distributions/python/ops/beta.py
@ -197,7 +197,8 @@ class Beta(distribution.Distribution):
    gamma1_sample = random_ops.random_gamma(
        [n,], a, dtype=self.dtype, seed=seed)
    gamma2_sample = random_ops.random_gamma(
-        [n,], b, dtype=self.dtype, seed=seed)
+        [n,], b, dtype=self.dtype,
+        seed=distribution_util.gen_new_seed(seed, "beta"))
    beta_sample = gamma1_sample / (gamma1_sample + gamma2_sample)
    return beta_sample

--- a/tensorflow/contrib/distributions/python/ops/bijector.py
+++ b/tensorflow/contrib/distributions/python/ops/bijector.py
@ -12,12 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""An API for reversible (bijective) transformations of random variables."""
+r"""Bijector Ops.
+
+An API for reversible (bijective) transformations of random variables.
+
+## Background
+
+Differentiable, bijective transformations of continuous random variables alter
+the calculations made in the cumulative/probability distribution functions and
+sample function.  This module provides a standard interface for making these
+manipulations.
+
+For more details and examples, see the `Bijector` docstring.
+
+To apply a `Bijector`, use `distributions.TransformedDistribution`.
+
+## Bijectors
+
+@@Bijector
+@@Identity
+@@Inline
+@@Exp
+@@ScaleAndShift
+
+"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import abc
 import contextlib
+import six

 from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
 from tensorflow.python.framework import constant_op
@ -26,40 +51,43 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops


-class _Bijector(object):
-  """An interface for transforming a `Distribution` `Tensor`.
-
-  Recall that a `Distribution` `Tensor` has dimensions which have `sample`,
-  `batch`, and `event` semantics. (See `DistributionShape` for more details.)
+@six.add_metaclass(abc.ABCMeta)
+class Bijector(object):
+  """Interface for transforming a `Distribution` via `TransformedDistribution`.

  A `Bijector` implements a bijective, differentiable function by transforming
  an input `Tensor`. The output `Tensor` shape is constrained by the input
  `sample`, `batch`, and `event` shape.  A `Bijector` is characterized by three
  operations:

-  (1) Forward Evaluation
-      Useful for turning one random outcome into another random outcome from a
-      different distribution.
+  1. Forward Evaluation

-  (2) Inverse Evaluation
-      Useful for "reversing" a transformation to compute one probability in
-      terms of another.
+     Useful for turning one random outcome into another random outcome from a
+     different distribution.

-  (3) (log o det o Jacobian o inverse)(x)
-      "The log of the determinant of the matrix of all first-order partial
-      derivatives of the inverse function."
-      Useful for inverting a transformation to compute one probability in terms
-      of another.  Geometrically, the det(Jacobian) is the volume of the
-      transformation and is used to scale the probability.
+  2. Inverse Evaluation
+
+     Useful for "reversing" a transformation to compute one probability in
+     terms of another.
+
+  3. (log o det o Jacobian o inverse)(x)
+
+     "The log of the determinant of the matrix of all first-order partial
+     derivatives of the inverse function."
+     Useful for inverting a transformation to compute one probability in terms
+     of another.  Geometrically, the det(Jacobian) is the volume of the
+     transformation and is used to scale the probability.

  By convention, transformations of random variables are named in terms of the
  forward transformation. The forward transformation creates samples, the
  inverse is useful for computing probabilities.

  Example Use:
-    Basic properties:
+
+    - Basic properties:

    ```python
    x = ... # A tensor.
@ -69,7 +97,7 @@ class _Bijector(object):
    x != my_bijector.forward(fwd_x)  # Not equal because g(x) != g(g(x)).
    ```

-    Computing a log-likelihood:
+    - Computing a log-likelihood:

    ```python
    def transformed_log_pdf(bijector, log_pdf, x):
@ -77,7 +105,7 @@ class _Bijector(object):
              log_pdf(bijector.inverse(x)))
    ```

-    Transforming a random outcome:
+    - Transforming a random outcome:

    ```python
    def transformed_sample(bijector, x):
@ -85,7 +113,8 @@ class _Bijector(object):
    ```

  Example transformations:
-    "Exponential"
+
+    - "Exponential"

      ```
      Y = g(X) = exp(X)
@ -102,7 +131,7 @@ class _Bijector(object):
                  = (1 / y) Normal(log(y); 0, 1)
      ```

-    "ShiftAndScale"
+    - "ScaleAndShift"

      ```
      Y = g(X) = sqrtSigma * X + mu
@ -122,7 +151,8 @@ class _Bijector(object):

  Example of why a `Bijector` needs to understand sample, batch, event
  partitioning:
-    Consider the `Exp` `Bijector` applied to a `Tensor` which has sample, batch,
+
+  - Consider the `Exp` `Bijector` applied to a `Tensor` which has sample, batch,
    and event (S, B, E) shape semantics.  Suppose
    the `Tensor`'s partitioned-shape is `(S=[4], B=[2], E=[3, 3])`.

@ -132,24 +162,25 @@ class _Bijector(object):
    over the event dimensions.

  Subclass Requirements:
-    Subclasses are expected to implement `_forward` and one or both of:
+
+  - Subclasses are expected to implement `_forward` and one or both of:
      - `_inverse`, `_inverse_log_det_jacobian`,
      - `_inverse_and_inverse_log_det_jacobian`.

-    If computation can be shared among `_inverse` and
+  - If computation can be shared among `_inverse` and
    `_inverse_log_det_jacobian` it is preferable to implement
    `_inverse_and_inverse_log_det_jacobian`. This usually reduces
    graph-construction overhead because a `Distribution`'s implementation of
    `log_prob` will need to evaluate both the inverse Jacobian as well as the
    inverse function.

-    If an additional use case needs just `inverse` or just
+  - If an additional use case needs just `inverse` or just
    `inverse_log_det_jacobian` then he or she may also wish to implement these
    functions to avoid computing the `inverse_log_det_jacobian` or the
    `inverse`, respectively.
  """

-  # TODO(b/30476956): Try to remove constructor dependence on ndims.
+  @abc.abstractmethod
  def __init__(self,
               batch_ndims=None,
               event_ndims=None,
@ -236,6 +267,9 @@ class _Bijector(object):
    """Returns the string name of this `Bijector`."""
    return self._name

+  def _forward(self, x):
+    raise NotImplementedError("forward is not implemented.")
+
  def forward(self, x, name="forward"):
    """Returns the forward `Bijector` evaluation, i.e., X = g(Y).

@ -249,13 +283,16 @@ class _Bijector(object):
    Raises:
      TypeError: if `self.dtype` is specified and `x.dtype` is not
        `self.dtype`.
-      AttributeError: if `_forward` is not implemented.
+      NotImplementedError: if `_forward` is not implemented.
    """
    with self._name_scope(name, [x]):
      x = ops.convert_to_tensor(x, name="x")
      self._maybe_assert_dtype(x)
      return self._forward(x)

+  def _inverse(self, x):
+    raise NotImplementedError("inverse is not implemented")
+
  def inverse(self, x, name="inverse"):
    """Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).

@ -269,7 +306,7 @@ class _Bijector(object):
    Raises:
      TypeError: if `self.dtype` is specified and `x.dtype` is not
        `self.dtype`.
-      AttributeError: if neither `_inverse` nor
+      NotImplementedError: if neither `_inverse` nor
        `_inverse_and_inverse_log_det_jacobian` are implemented.
    """
    with self._name_scope(name, [x]):
@ -277,11 +314,14 @@ class _Bijector(object):
      self._maybe_assert_dtype(x)
      try:
        return self._inverse(x)
-      except AttributeError:
+      except NotImplementedError:
        # Since _inverse was not implemented, try to see if it's implemented
        # by the _inverse_and_inverse_log_det_jacobian member.
        return self._inverse_and_inverse_log_det_jacobian(x)[0]

+  def _inverse_log_det_jacobian(self, x):
+    raise NotImplementedError("inverse_log_det_jacobian is not implemented")
+
  def inverse_log_det_jacobian(self, x, name="inverse_log_det_jacobian"):
    """Returns the (log o det o Jacobian o inverse)(x).

@ -300,7 +340,7 @@ class _Bijector(object):
    Raises:
      TypeError: if `self.dtype` is specified and `x.dtype` is not
        `self.dtype`.
-      AttributeError: if neither `_inverse_log_det_jacobian` nor
+      NotImplementedError: if neither `_inverse_log_det_jacobian` nor
        `_inverse_and_inverse_log_det_jacobian` are implemented.
    """
    with self._name_scope(name, [x]):
@ -308,11 +348,15 @@ class _Bijector(object):
      self._maybe_assert_dtype(x)
      try:
        return self._inverse_log_det_jacobian(x)
-      except AttributeError:
+      except NotImplementedError:
        # Since _inverse_log_det_jacobian was not implemented, try to see if
        # it's implemented by the _inverse_and_inverse_log_det_jacobian member.
        return self._inverse_and_inverse_log_det_jacobian(x)[1]

+  def _inverse_and_inverse_log_det_jacobian(self, x):
+    raise NotImplementedError(
+        "inverse_and_inverse_log_det_jacobian is not implemented.")
+
  def inverse_and_inverse_log_det_jacobian(
      self, x, name="inverse_and_inverse_log_det_jacobian"):
    """Returns both the inverse evaluation and inverse_log_det_jacobian.
@ -332,15 +376,15 @@ class _Bijector(object):
    Raises:
      TypeError: if `self.dtype` is specified and `x.dtype` is not
        `self.dtype`.
-      AttributeError: if neither `_inverse_and_inverse_log_det_jacobian` nor
-        {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+      NotImplementedError: if neither `_inverse_and_inverse_log_det_jacobian`
+        nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
    """
    with self._name_scope(name, [x]):
      x = ops.convert_to_tensor(x, name="x")
      self._maybe_assert_dtype(x)
      try:
        return self._inverse_and_inverse_log_det_jacobian(x)
-      except AttributeError:
+      except NotImplementedError:
        # Since _inverse_and_inverse_log_det_jacobian was not implemented, try
        # to see if we can separately use _inverse and
        # _inverse_log_det_jacobian members.
@ -361,7 +405,7 @@ class _Bijector(object):
                      (self.dtype, x.dtype))


-class _Identity(_Bijector):
+class Identity(Bijector):
  """Bijector which computes Y = g(X) = X.

    Example Use:
@ -378,7 +422,7 @@ class _Identity(_Bijector):
  """

  def __init__(self, validate_args=False, name="Identity"):
-    super(_Identity, self).__init__(
+    super(Identity, self).__init__(
        batch_ndims=0,
        event_ndims=0,
        is_constant_jacobian=True,
@ -396,7 +440,59 @@ class _Identity(_Bijector):
    return constant_op.constant(0., dtype=x.dtype)


-class _Exp(_Bijector):
+class Inline(Bijector):
+  # pylint: disable=line-too-long
+  """Bijector constructed from callables implementing forward, inverse, and inverse_log_det_jacobian.
+
+  Example Use:
+
+  ```python
+  exp = Inline(
+    forward_fn=tf.exp,
+    inverse_fn=tf.log,
+    inverse_log_det_jacobian_fn=(
+      lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)),
+    name="Exp")
+  ```
+
+  The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`.
+  """
+  # pylint: enable=line-too-long
+
+  def __init__(self, forward_fn, inverse_fn, inverse_log_det_jacobian_fn,
+               is_constant_jacobian=False, name="Inline"):
+    """Creates a `Bijector` from callables.
+
+    Args:
+      forward_fn: Python callable implementing the forward transformation.
+      inverse_fn: Python callable implementing the inverse transformation.
+      inverse_log_det_jacobian_fn: Python callable implementing the
+        inverse_log_det_jacobian transformation.
+      is_constant_jacobian: `Boolean` indicating that the Jacobian is constant
+        for all input arguments.
+      name: `String`, name given to ops managed by this object.
+    """
+    super(Inline, self).__init__(
+        batch_ndims=0,
+        event_ndims=0,
+        is_constant_jacobian=is_constant_jacobian,
+        validate_args=False,
+        name=name)
+    self._forward_fn = forward_fn
+    self._inverse_fn = inverse_fn
+    self._inverse_log_det_jacobian_fn = inverse_log_det_jacobian_fn
+
+  def _forward(self, x):
+    return self._forward_fn(x)
+
+  def _inverse(self, y):
+    return self._inverse_fn(y)
+
+  def _inverse_log_det_jacobian(self, y):
+    return self._inverse_log_det_jacobian_fn(y)
+
+
+class Exp(Bijector):
  """Bijector which computes Y = g(X) = exp(X).

    Example Use:
@ -417,12 +513,11 @@ class _Exp(_Bijector):
    over the event space.
  """

-  # TODO(b/30476956): Try to remove constructor dependence on ndims.
  def __init__(self,
               event_ndims=0,
               validate_args=False,
               name="Exp"):
-    super(_Exp, self).__init__(
+    super(Exp, self).__init__(
        batch_ndims=0,
        event_ndims=event_ndims,
        validate_args=validate_args,
@ -448,7 +543,7 @@ class _Exp(_Bijector):
    return y, -math_ops.reduce_sum(y, reduction_indices=event_dims)


-class _ShiftAndScale(_Bijector):
+class ScaleAndShift(Bijector):
  """Bijector which computes Y = g(X; loc, scale) = scale * X + loc.

  Example Use:
@ -457,35 +552,35 @@ class _ShiftAndScale(_Bijector):
  # No batch, scalar.
  mu = 0     # shape=[]
  sigma = 1  # shape=[]
-  b = ShiftAndScale(loc=mu, scale=sigma)
+  b = ScaleAndShift(loc=mu, scale=sigma)
  # b.shaper.batch_ndims == 0
  # b.shaper.event_ndims == 0

  # One batch, scalar.
  mu = ...    # shape=[b], b>0
  sigma = ... # shape=[b], b>0
-  b = ShiftAndScale(loc=mu, scale=sigma)
+  b = ScaleAndShift(loc=mu, scale=sigma)
  # b.shaper.batch_ndims == 1
  # b.shaper.event_ndims == 0

  # No batch, multivariate.
  mu = ...    # shape=[d],    d>0
  sigma = ... # shape=[d, d], d>0
-  b = ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+  b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
  # b.shaper.batch_ndims == 0
  # b.shaper.event_ndims == 1

  # (B1*B2*...*Bb)-batch, multivariate.
  mu = ...    # shape=[B1,...,Bb, d],    b>0, d>0
  sigma = ... # shape=[B1,...,Bb, d, d], b>0, d>0
-  b = ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+  b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
  # b.shaper.batch_ndims == b
  # b.shaper.event_ndims == 1

  # Mu is broadcast:
  mu = 1
  sigma = [I, I]  # I is a 3x3 identity matrix.
-  b = ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+  b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
  x = numpy.ones(S + sigma.shape)
  b.forward(x) # == x + 1
  ```
@ -497,7 +592,7 @@ class _ShiftAndScale(_Bijector):
               scale,
               event_ndims=0,
               validate_args=False,
-               name="ShiftAndScale"):
+               name="ScaleAndShift"):
    self._parameters = {}
    self._name = name
    with self._name_scope("init", values=[loc, scale, event_ndims]):
@ -512,7 +607,7 @@ class _ShiftAndScale(_Bijector):
        raise TypeError("%s.dtype=%s does not match %s" %
                        (event_ndims.name, event_ndims.dtype, dtypes.int32))
      self._scale, batch_ndims = self._process_scale(self.scale, event_ndims)
-      super(_ShiftAndScale, self).__init__(
+      super(ScaleAndShift, self).__init__(
          batch_ndims=batch_ndims,
          event_ndims=event_ndims,
          parameters={"loc": self.loc, "scale": self.scale},
@ -590,3 +685,77 @@ class _ShiftAndScale(_Bijector):
    return -math_ops.reduce_sum(
        math_ops.log(array_ops.matrix_diag_part(self.scale)),
        reduction_indices=[-1])
+
+
+class Softplus(Bijector):
+  """Bijector which computes `Y = g(X) = Log[1 + exp(X)]`.
+
+  The softplus `Bijector` has the following two useful properties:
+
+  * The domain is the positive real numbers
+  * `softplus(x) approx x`, for large `x`, so it does not overflow as easily as
+    the `Exp` `Bijector`.
+
+    Example Use:
+
+    ```python
+    # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1
+    # batch ndim and 2 event ndims (i.e., vector of matrices).
+    softplus = Softplus(batch_ndims=1, event_ndims=2)
+    x = [[[1., 2],
+           [3, 4]],
+          [[5, 6],
+           [7, 8]]]
+    log(1 + exp(x)) == softplus.forward(x)
+    log(exp(x) - 1) == softplus.inverse(x)
+    ```
+
+    Note: log(.) and exp(.) are applied element-wise but the Jacobian is a
+    reduction over the event space.
+  """
+
+  def __init__(self,
+               event_ndims=0,
+               validate_args=False,
+               name="Softplus"):
+    super(Softplus, self).__init__(
+        batch_ndims=0,
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  def _forward(self, x):
+    return nn_ops.softplus(x)
+
+  def _inverse(self, x):
+    # The most stable inverse of softplus is not the most direct one.
+    # y = softplus(x) = Log[1 + exp{x}], (which means y > 0).
+    # ==> exp{y} = 1 + exp{x}
+    # ==> x = Log[exp{y} - 1]
+    #       = Log[(exp{y} - 1) / exp{y}] + Log[exp{y}]
+    #       = Log[(1 - exp{-y}) / 1] + Log[exp{y}]
+    #       = Log[1 - exp{-y}] + y
+    # Recalling y > 0, you see that this is more stable than Log[exp{y} - 1].
+    return x + math_ops.log(1. - math_ops.exp(-x))
+
+  def _inverse_log_det_jacobian(self, x):
+    # Stable inverse log det jacobian.
+    # Y = Log[1 + exp{X}] ==> X = Log[exp{Y} - 1]
+    # ==> dX/dY = exp{Y} / (exp{Y} - 1)
+    #           = 1 / (1 - exp{-Y}),
+    # which is the most stable for Y > 0.
+    if self.shaper is None:
+      raise ValueError("Jacobian cannot be computed with unknown event_ndims")
+    _, _, event_dims = self.shaper.get_dims(x)
+    return -math_ops.reduce_sum(
+        math_ops.log(1. - math_ops.exp(-x)), reduction_indices=event_dims)
+
+  def _inverse_and_inverse_log_det_jacobian(self, x):
+    if self.shaper is None:
+      raise ValueError("Jacobian cannot be computed with unknown event_ndims")
+    _, _, event_dims = self.shaper.get_dims(x)
+    log_one_minus_exp_neg = math_ops.log(1. - math_ops.exp(-x))
+    y = x + log_one_minus_exp_neg
+    ildj = -math_ops.reduce_sum(
+        log_one_minus_exp_neg, reduction_indices=event_dims)
+    return y, ildj
--- a/tensorflow/contrib/distributions/python/ops/distribution_util.py
+++ b/tensorflow/contrib/distributions/python/ops/distribution_util.py
@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function

 import functools
+import hashlib
 import sys
 import numpy as np

@ -197,8 +198,8 @@ def log_combinations(n, counts, name="log_combinations"):
  # The sum should be along the last dimension of counts.  This is the
  # "distribution" dimension. Here n a priori represents the sum of counts.
  with ops.name_scope(name, values=[n, counts]):
-    n = array_ops.identity(n, name="n")
-    counts = array_ops.identity(counts, name="counts")
+    n = ops.convert_to_tensor(n, name="n")
+    counts = ops.convert_to_tensor(counts, name="counts")
    total_permutations = math_ops.lgamma(n + 1)
    counts_factorial = math_ops.lgamma(counts + 1)
    redundant_permutations = math_ops.reduce_sum(counts_factorial,
@ -397,6 +398,14 @@ def pick_vector(cond,
                           [math_ops.select(cond, n, -1)])


+def gen_new_seed(seed, salt):
+  """Generate a new seed, from the given seed and salt."""
+  if seed:
+    string = (str(seed) + salt).encode("utf-8")
+    return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF
+  return None
+
+
 def override_docstring_if_empty(fn, doc_str):
  """Override the `doc_str` argument to `fn.__doc__`.

--- a/tensorflow/contrib/distributions/python/ops/mixture.py
+++ b/tensorflow/contrib/distributions/python/ops/mixture.py
@ -22,6 +22,7 @@ import numpy as np

 from tensorflow.contrib.distributions.python.ops import categorical
 from tensorflow.contrib.distributions.python.ops import distribution
+from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
@ -295,8 +296,10 @@ class Mixture(distribution.Distribution):
          partitions=cat_samples,
          num_partitions=self.num_components)
      samples_class = [None for _ in range(self.num_components)]
+
      for c in range(self.num_components):
        n_class = array_ops.size(partitioned_samples_indices[c])
+        seed = distribution_util.gen_new_seed(seed, "mixture")
        samples_class_c = self.components[c].sample_n(n_class, seed=seed)

        # Pull out the correct batch entries from each index.
--- a/tensorflow/contrib/distributions/python/ops/student_t.py
+++ b/tensorflow/contrib/distributions/python/ops/student_t.py
@ -177,22 +177,17 @@ class StudentT(distribution.Distribution):
    return tensor_shape.scalar()

  def _sample_n(self, n, seed=None):
-    # We use 2 uniform random floats to generate polar random variates.
-    # http://dl.acm.org/citation.cfm?id=179631
-    # Theorem 2. Let G, H be iid variates, uniformly distributed on [0,1].
-    # Let theta = 2*pi*H, let R = sqrt(df*(G^(-2/df) - 1)) for df > 0.
-    # Let X = R*cos(theta), and let Y = R*sin(theta).
-    # Then X ~ t_df and Y ~ t_df.
-    # The variates X and Y are not independent.
-    shape = array_ops.concat(0, ([2, n], self.batch_shape()))
-    uniform = random_ops.random_uniform(shape=shape,
-                                        dtype=self.dtype,
-                                        seed=seed)
-    samples_g, samples_h = array_ops.unpack(uniform, num=2)
-    theta = (2. * math.pi) * samples_h
-    r = math_ops.sqrt(self.df *
-                      (math_ops.pow(samples_g, -2 / self.df) - 1))
-    samples = r * math_ops.cos(theta)
+    # The sampling method comes from the well known fact that if X ~ Normal(0,
+    # 1), and Z ~ Chi2(df), then X / sqrt(Z / df) ~ StudentT(df).
+    shape = array_ops.concat(0, ([n], self.batch_shape()))
+    normal_sample = random_ops.random_normal(
+        shape, dtype=self.dtype, seed=seed)
+    half = constant_op.constant(0.5, self.dtype)
+    df = self.df * array_ops.ones(self.batch_shape(), dtype=self.dtype)
+    gamma_sample = random_ops.random_gamma(
+        [n,], half * df, beta=half, dtype=self.dtype,
+        seed=distribution_util.gen_new_seed(seed, salt="student_t"))
+    samples = normal_sample / math_ops.sqrt(gamma_sample / df)
    return samples * self.sigma + self.mu

  def _log_prob(self, x):
--- a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py
+++ b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py
@ -26,107 +26,139 @@ from tensorflow.python.ops import math_ops
 class TransformedDistribution(distribution.Distribution):
  """A Transformed Distribution.

-  A Transformed Distribution models `p(y)` given a base distribution `p(x)`,
-  an invertible transform, `y = f(x)`, and the determinant of the Jacobian of
-  `f(x)`.
+  A Transformed Distribution models `p(y)` given a base distribution `p(x)`, and
+  a deterministic, invertible, differentiable transform, `Y = g(X)`. The
+  transform is typically an instance of the `Bijector` class and the base
+  distribution is typically an instance of the `Distribution` class.

  Shapes, type, and reparameterization are taken from the base distribution.

-  #### Mathematical details
+  Write `P(Y=y)` for cumulative density function of random variable (rv) `Y` and
+  `p` for its derivative wrt to `Y`.  Assume that `Y=g(X)` where `g` is
+  continuous and `X=g^{-1}(Y)`. Write `J` for the Jacobian (of some function).

-  * `p(x)` - probability distribution for random variable X
-  * `p(y)` - probability distribution for random variable Y
-  * `f` - transform
-  * `g` - inverse transform, `g(f(x)) = x`
-  * `J(x)` - Jacobian of f(x)
+  A `TransformedDistribution` alters the input/outputs of a `Distribution`
+  associated with rv `X` in the following ways:

-  A Transformed Distribution exposes `sample` and `pdf`:
+    * `sample`:

-    * `sample`: `y = f(x)`, after drawing a sample of X.
-    * `pdf`: `p(y) = p(x) / det|J(x)| = p(g(y)) / det|J(g(y))|`
+      Mathematically:
+
+      ```
+      Y = g(X)
+      ```
+
+      Programmatically:
+
+      ```python
+      return bijector.forward(distribution.sample(...))
+      ```
+
+    * `log_prob`:
+
+      Mathematically:
+
+      ```
+      (log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)
+      ```
+
+      Programmatically:
+
+      ```python
+      return (bijector.inverse_log_det_jacobian(x) +
+              distribution.log_prob(bijector.inverse(x))
+      ```
+
+    * `log_cdf`:
+
+      Mathematically:
+
+      ```
+      (log o P o g^{-1})(y)
+      ```
+
+      Programmatically:
+
+      ```python
+      return distribution.log_prob(bijector.inverse(x))
+      ```
+
+    * and similarly for: `cdf`, `prob`, `log_survival_function`,
+     `survival_function`.

  A simple example constructing a Log-Normal distribution from a Normal
  distribution:

  ```python
-  logit_normal = TransformedDistribution(
-    base_dist_cls=tf.contrib.distributions.Normal,
-    mu=mu,
-    sigma=sigma,
-    transform=lambda x: tf.sigmoid(x),
-    inverse=lambda y: tf.log(y) - tf.log(1. - y),
-    log_det_jacobian=(lambda x:
-        tf.reduce_sum(tf.log(tf.sigmoid(x)) + tf.log(1. - tf.sigmoid(x)),
-                      reduction_indices=[-1])))
-    name="LogitNormalTransformedDistribution"
-  )
+  ds = tf.contrib.distributions
+  log_normal = ds.TransformedDistribution(
+    base_distribution=ds.Normal(mu=mu, sigma=sigma),
+    bijector=ds.bijector.Exp(),
+    name="LogNormalTransformedDistribution")
+  ```
+
+  A `LogNormal` made from callables:
+
+  ```python
+  ds = tf.contrib.distributions
+  log_normal = ds.TransformedDistribution(
+    base_distribution=ds.Normal(mu=mu, sigma=sigma),
+    bijector=ds.bijector.Inline(
+      forward_fn=tf.exp,
+      inverse_fn=tf.log,
+      inverse_log_det_jacobian_fn=(
+        lambda y: -tf.reduce_sum(tf.log(x), reduction_indices=-1)),
+    name="LogNormalTransformedDistribution")
+  ```
+
+  Another example constructing a Normal from a StandardNormal:
+
+  ```python
+  ds = tf.contrib.distributions
+  normal = ds.TransformedDistribution(
+    base_distribution=ds.Normal(mu=0, sigma=1),
+    bijector=ds.bijector.ScaleAndShift(loc=mu, scale=sigma, event_ndims=0),
+    name="NormalTransformedDistribution")
  ```

  """

  def __init__(self,
-               base_dist_cls,
-               transform,
-               inverse,
-               log_det_jacobian,
-               name="TransformedDistribution",
-               **base_dist_args):
+               base_distribution,
+               bijector,
+               name="TransformedDistribution"):
    """Construct a Transformed Distribution.

    Args:
-      base_dist_cls: the base distribution class to transform. Must be a
-          subclass of `Distribution`.
-      transform: a callable that takes a `Tensor` sample from `base_dist` and
-          returns a `Tensor` of the same shape and type. `x => y`.
-      inverse: a callable that computes the inverse of transform. `y => x`. If
-          None, users can only call `log_pdf` on values returned by `sample`.
-      log_det_jacobian: a callable that takes a `Tensor` sample from `base_dist`
-          and returns the log of the determinant of the Jacobian of `transform`.
+      base_distribution: The base distribution class to transform. Typically an
+        instance of `Distribution`.
+      bijector: The object responsible for calculating the transformation.
+        Typically an instance of `Bijector`.
      name: The name for the distribution.
-      **base_dist_args: kwargs to pass on to dist_cls on construction.
-
-    Raises:
-      TypeError: if `base_dist_cls` is not a subclass of
-          `Distribution`.
    """
-    with ops.name_scope(name, values=base_dist_args.values()) as ns:
-      self._base_dist = base_dist_cls(**base_dist_args)
-      self._transform = transform
-      self._inverse = inverse
-      self._log_det_jacobian = log_det_jacobian
+    with ops.name_scope(name) as ns:
+      self._base_distribution = base_distribution
+      self._bijector = bijector
      self._inverse_cache = {}
      super(TransformedDistribution, self).__init__(
-          dtype=self._base_dist.dtype,
-          parameters={"base_dist_cls": base_dist_cls,
-                      "transform": transform,
-                      "inverse": inverse,
-                      "log_det_jacobian": log_det_jacobian,
-                      "base_dist_args": base_dist_args},
-          is_continuous=self._base_dist.is_continuous,
-          is_reparameterized=self._base_dist.is_reparameterized,
-          validate_args=self._base_dist.validate_args,
-          allow_nan_stats=self._base_dist.allow_nan_stats,
+          dtype=self._base_distribution.dtype,
+          parameters={"base_distribution": base_distribution,
+                      "bijector": bijector},
+          is_continuous=self._base_distribution.is_continuous,
+          is_reparameterized=self._base_distribution.is_reparameterized,
+          validate_args=self._base_distribution.validate_args,
+          allow_nan_stats=self._base_distribution.allow_nan_stats,
          name=ns)

  @property
  def base_distribution(self):
    """Base distribution, p(x)."""
-    return self._base_dist
+    return self._base_distribution

  @property
-  def transform(self):
+  def bijector(self):
    """Function transforming x => y."""
-    return self._transform
-
-  @property
-  def inverse(self):
-    """Inverse function of transform, y => x."""
-    return self._inverse
-
-  @property
-  def log_det_jacobian(self):
-    """Function computing the log determinant of the Jacobian of transform."""
-    return self._log_det_jacobian
+    return self._bijector

  def _batch_shape(self):
    return self.base_distribution.batch_shape()
@ -142,29 +174,27 @@ class TransformedDistribution(distribution.Distribution):

  @distribution_util.AppendDocstring(
      """Samples from the base distribution and then passes through
-      the transform.""")
+      the bijector's forward transform.""")
  def _sample_n(self, n, seed=None):
-    samples = self.base_distribution.sample_n(n=n, seed=seed)
-    with ops.name_scope("transform"):
-      transformed = self.transform(samples)
-      self._inverse_cache[transformed] = samples
-      return transformed
+    raw_samples = self.base_distribution.sample_n(n=n, seed=seed)
+    samples = self.bijector.forward(raw_samples)
+    self._inverse_cache[samples] = raw_samples
+    return samples

  @distribution_util.AppendDocstring(
-      """Implements `(log o p o g)(y) - (log o det o J o g)(y)`,
-      where `g` is the inverse of `transform`.
+      """Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
+      where `g^{-1}` is the inverse of `transform`.

      Also raises a `ValueError` if `inverse` was not provided to the
      distribution and `y` was not returned from `sample`.""")
  def _log_prob(self, y):
    x = self._inverse_possibly_from_cache(y)
-    with ops.name_scope("log_det_jacobian"):
-      log_det_jacobian = self.log_det_jacobian(x)
-    return self.base_distribution.log_prob(x) - log_det_jacobian
+    inverse_log_det_jacobian = self.bijector.inverse_log_det_jacobian(y)
+    return self.base_distribution.log_prob(x) + inverse_log_det_jacobian

  @distribution_util.AppendDocstring(
-      """Implements `p(g(y)) / det|J(g(y))|`, where `g` is the inverse of
-      `transform`.
+      """Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
+      inverse of `transform`.

      Also raises a `ValueError` if `inverse` was not provided to the
      distribution and `y` was not returned from `sample`.""")
@ -172,8 +202,6 @@ class TransformedDistribution(distribution.Distribution):
    return math_ops.exp(self._log_prob(y))

  def _log_cdf(self, y):
-    # If Y = f(X),
-    # P[Y <= y] = P[f(X) <= y] = P[X <= f^{-1}(y)]
    x = self._inverse_possibly_from_cache(y)
    return self.base_distribution.log_cdf(x)

@ -192,12 +220,7 @@ class TransformedDistribution(distribution.Distribution):
  def _inverse_possibly_from_cache(self, y):
    """Return `self._inverse(y)`, possibly using cached value."""
    y = ops.convert_to_tensor(y, name="y")
-    with ops.name_scope("inverse"):
-      if y in self._inverse_cache:
-        x = self._inverse_cache[y]
-      elif self.inverse:
-        x = self.inverse(y)
-      else:
-        raise ValueError("No inverse function exists and input `y` was not "
-                         "returned from `sample`.")
-    return x
+    if y in self._inverse_cache:
+      return self._inverse_cache[y]
+    else:
+      return self.bijector.inverse(y)
--- a/tensorflow/contrib/distributions/python/ops/wishart.py
+++ b/tensorflow/contrib/distributions/python/ops/wishart.py
@ -22,6 +22,7 @@ import math
 import numpy as np

 from tensorflow.contrib.distributions.python.ops import distribution
+from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.contrib.distributions.python.ops import operator_pd_cholesky
 from tensorflow.contrib.distributions.python.ops import operator_pd_full
 from tensorflow.contrib.framework.python.framework import tensor_util as contrib_tensor_util
@ -211,7 +212,8 @@ class _WishartOperatorPD(distribution.Distribution):
                                    0.5 * self.df, self.dimension),
                                beta=0.5,
                                dtype=self.dtype,
-                                seed=seed)
+                                seed=distribution_util.gen_new_seed(
+                                    seed, "wishart"))

    # Complexity: O(nbk^2)
    x = array_ops.matrix_band_part(x, -1, 0)  # Tri-lower.
--- a/tensorflow/contrib/framework/python/framework/decorator_utils.py
+++ b/tensorflow/contrib/framework/python/framework/decorator_utils.py
@ -56,6 +56,7 @@ def add_notice_to_docstring(
 def validate_callable(func, decorator_name):
  if not hasattr(func, '__call__'):
    raise ValueError(
-        '%s is not a function. If this is a property, '
-        'apply @%s before @property:\n\n@property\n@%s\ndef method(...)' % (
+        '%s is not a function. If this is a property, make sure'
+        ' @property appears before @%s in your source code:'
+        '\n\n@property\n@%s\ndef method(...)' % (
            func, decorator_name, decorator_name))
--- a/tensorflow/contrib/framework/python/framework/deprecation_test.py
+++ b/tensorflow/contrib/framework/python/framework/deprecation_test.py
@ -245,11 +245,10 @@ class DeprecationTest(tf.test.TestCase):
    self.assertRegexpMatches(args[0], r"deprecated and will be removed after")
    self._assert_subset(set([date, instructions]), set(args[1:]))

-  @tf.test.mock.patch.object(logging, "warning", autospec=True)
-  def test_prop_wrong_order(self, mock_warning):
-
+  def test_prop_wrong_order(self):
    with self.assertRaisesRegexp(
-        ValueError, "apply @deprecated before @property"):
+        ValueError,
+        "make sure @property appears before @deprecated in your source code"):
      # pylint: disable=unused-variable

      class _Object(object):
@ -357,8 +356,7 @@ class DeprecatedArgsTest(tf.test.TestCase):
    with self.assertRaisesRegexp(ValueError, "argument"):
      deprecation.deprecated_args(date, instructions)

-  @tf.test.mock.patch.object(logging, "warning", autospec=True)
-  def test_deprecated_missing_args(self, mock_warning):
+  def test_deprecated_missing_args(self):
    date = "2016-07-04"
    instructions = "This is how you update..."

--- a/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc
+++ b/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc
@ -68,6 +68,7 @@ class SparseTensorColumn : public ColumnInterface<InternalType> {
    return feature_counts_[batch];
  }

+  // InternalType is int64 only when using HashCrosser.
  int64 DoFeature(int64 batch, int64 n, int64 not_used) const {
    const int64 start = feature_start_indices_[batch];
    if (DT_STRING == values_.dtype())
@ -75,6 +76,7 @@ class SparseTensorColumn : public ColumnInterface<InternalType> {
    return values_.vec<int64>().data()[start + n];
  }

+  // InternalType is string or StringPiece when using StringCrosser.
  string DoFeature(int64 batch, int64 n, string not_used) const {
    const int64 start = feature_start_indices_[batch];
    if (DT_STRING == values_.dtype())
@ -103,12 +105,14 @@ class DenseTensorColumn : public ColumnInterface<InternalType> {

  int64 FeatureCount(int64 batch) const override { return tensor_.dim_size(1); }

+  // InternalType is int64 only when using HashCrosser.
  int64 DoFeature(int64 batch, int64 n, int64 not_used) const {
    if (DT_STRING == tensor_.dtype())
      return Fingerprint64(tensor_.matrix<string>()(batch, n));
    return tensor_.matrix<int64>()(batch, n);
  }

+  // Internal type is string or StringPiece when using StringCrosser.
  string DoFeature(int64 batch, int64 n, string not_used) const {
    if (DT_STRING == tensor_.dtype()) return tensor_.matrix<string>()(batch, n);
    return std::to_string(tensor_.matrix<int64>()(batch, n));
@ -158,7 +162,7 @@ class StringCrosser {
 public:
  StringCrosser(const std::vector<
                    std::unique_ptr<ColumnInterface<InternalType>>>& columns,
-                const int64 not_used)
+                const int64 num_buckets_unused, const uint64 hash_key_unused)
      : columns_(columns) {}

  string Generate(const int64 batch_index,
@ -178,32 +182,62 @@ class StringCrosser {
  const std::vector<std::unique_ptr<ColumnInterface<InternalType>>>& columns_;
 };

-// Seed is chosen based on third_party/tensorflow/core/lib/hash/hash.h
-const int64 kInitialHashSeed = 0xDECAFCAFFE;
-
-int64 HashCombine(int64 a, int64 b) {
-  return a ^ (b + 0x9e3779b97f4a7800 + (a << 10) + (a >> 4));
-}
-
 // Generates the sparse crosses as nested hash to avoid string manipulations.
 class HashCrosser {
 public:
  HashCrosser(
      const std::vector<std::unique_ptr<ColumnInterface<int64>>>& columns,
-      const int64 num_buckets)
+      const int64 num_buckets, const uint64 hash_key_unused)
      : columns_(columns), num_buckets_(num_buckets) {}

  int64 Generate(const int64 batch_index,
                 const std::vector<int>& permutation) const {
+    // Seed is chosen based on third_party/tensorflow/core/lib/hash/hash.h
+    static const int64 kInitialHashSeed = 0xDECAFCAFFE;
+
    uint64 hashed_output = kInitialHashSeed;
-    for (int i = 0; i < permutation.size(); i++) {
+    for (size_t i = 0; i < permutation.size(); ++i) {
      int64 hash_i = columns_[i]->Feature(batch_index, permutation[i]);
      hashed_output = HashCombine(hashed_output, hash_i);
    }
    if (num_buckets_ > 0) {
      return hashed_output % num_buckets_;
    } else {
-      // To perevent negative output we take module to max int64.
+      // To prevent negative output we take modulo to max int64.
+      return hashed_output % std::numeric_limits<int64>::max();
+    }
+  }
+
+ private:
+  static int64 HashCombine(int64 a, int64 b) {
+    return a ^ (b + 0x9e3779b97f4a7800 + (a << 10) + (a >> 4));
+  }
+
+  const std::vector<std::unique_ptr<ColumnInterface<int64>>>& columns_;
+  const int64 num_buckets_;
+};
+
+// Generates the sparse crosses as nested hash to avoid string manipulations.
+class HashCrosserV2 {
+ public:
+  HashCrosserV2(
+      const std::vector<std::unique_ptr<ColumnInterface<int64>>>& columns,
+      const int64 num_buckets, const uint64 hash_key)
+      : columns_(columns), num_buckets_(num_buckets), hash_key_(hash_key) {}
+
+  int64 Generate(const int64 batch_index,
+                 const std::vector<int>& permutation) const {
+    // Do the fingerprint concatenation on uint64.
+    uint64 hashed_output = hash_key_;
+    for (size_t i = 0; i < permutation.size(); ++i) {
+      uint64 hash_i = columns_[i]->Feature(batch_index, permutation[i]);
+      hashed_output = FingerprintCat64(hashed_output, hash_i);
+    }
+    // The return value is int64 based on the number of buckets.
+    if (num_buckets_ > 0) {
+      return hashed_output % num_buckets_;
+    } else {
+      // To prevent negative output we take modulo to max int64.
      return hashed_output % std::numeric_limits<int64>::max();
    }
  }
@ -211,6 +245,7 @@ class HashCrosser {
 private:
  const std::vector<std::unique_ptr<ColumnInterface<int64>>>& columns_;
  const int64 num_buckets_;
+  const uint64 hash_key_;
 };

 // ProductIterator generates cartesian products based on indices.
@ -262,28 +297,41 @@ class ProductIterator {
  std::vector<int> next_permutation_;
 };

-template <bool HASHED_OUTPUT, typename InternalType>
+template <bool HASHED_OUTPUT, typename InternalType, bool VERSION_2>
 struct CrossTraits;

-template <typename InternalType>
-struct CrossTraits<false, InternalType> {
+template <typename InternalType, bool VERSION_2>
+struct CrossTraits<false, InternalType, VERSION_2> {
  typedef StringCrosser<InternalType> Crosser;
  typedef OutputUpdater<string> Updater;
 };

 template <>
-struct CrossTraits<true, int64> {
+struct CrossTraits<true, int64, false> {
  typedef HashCrosser Crosser;
  typedef OutputUpdater<int64> Updater;
 };
+
+template <>
+struct CrossTraits<true, int64, true> {
+  typedef HashCrosserV2 Crosser;
+  typedef OutputUpdater<int64> Updater;
+};
 }  // namespace

-template <bool HASHED_OUTPUT, typename InternalType>
+template <bool HASHED_OUTPUT, typename InternalType, bool VERSION_2>
 class SparseFeatureCrossOp : public OpKernel {
 public:
  explicit SparseFeatureCrossOp(OpKernelConstruction* context)
      : OpKernel(context) {
    OP_REQUIRES_OK(context, context->GetAttr("num_buckets", &num_buckets_));
+    if (VERSION_2) {
+      // Read signed_hash_key_ as int64 since uint64 attributes are not
+      // supported by REGISTER_OP.
+      int64 signed_hash_key_;
+      OP_REQUIRES_OK(context, context->GetAttr("hash_key", &signed_hash_key_));
+      hash_key_ = static_cast<uint64>(signed_hash_key_);
+    }
  }

  void Compute(OpKernelContext* context) override {
@ -303,8 +351,8 @@ class SparseFeatureCrossOp : public OpKernel {
        GenerateColumnsFromInput(indices_list_in, values_list_in,
                                 shapes_list_in, dense_list_in);

-    typename CrossTraits<HASHED_OUTPUT, InternalType>::Crosser crosser(
-        columns, num_buckets_);
+    typename CrossTraits<HASHED_OUTPUT, InternalType, VERSION_2>::Crosser
+        crosser(columns, num_buckets_, hash_key_);
    Tensor* indices_out;
    Tensor* values_out;
    Tensor* shape_out;
@ -313,8 +361,8 @@ class SparseFeatureCrossOp : public OpKernel {
    CreateOutputTensors(columns, batch_size, context, &indices_out, &values_out,
                        &shape_out, &output_start_indices);

-    typename CrossTraits<HASHED_OUTPUT, InternalType>::Updater updater(
-        output_start_indices, indices_out, values_out);
+    typename CrossTraits<HASHED_OUTPUT, InternalType, VERSION_2>::Updater
+        updater(output_start_indices, indices_out, values_out);
    auto do_work = [this, &columns, crosser, updater](int64 begin, int64 end) {
      for (int b = begin; b < end; b++) {
        ProductIterator<InternalType> product_iterator(columns, b);
@ -459,7 +507,7 @@ class SparseFeatureCrossOp : public OpKernel {
    return columns;
  }

-  // Extrats data about the features and populates feature data.
+  // Extracts data about the features and populates feature data.
  void ExtractFeatureData(
      const OpInputList& indices_list_in, int64 batch_size,
      std::vector<std::vector<int64>>* feature_counts,
@ -536,30 +584,57 @@ class SparseFeatureCrossOp : public OpKernel {
    return cross_count;
  }
  int64 num_buckets_;
+  uint64 hash_key_;
 };

 REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross")
                            .Device(DEVICE_CPU)
                            .TypeConstraint<string>("out_type")
                            .TypeConstraint<string>("internal_type"),
-                        SparseFeatureCrossOp<false, StringPiece>);
+                        SparseFeatureCrossOp<false, StringPiece, false>);

 REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross")
                            .Device(DEVICE_CPU)
                            .TypeConstraint<string>("out_type")
                            .TypeConstraint<int64>("internal_type"),
-                        SparseFeatureCrossOp<false, string>);
+                        SparseFeatureCrossOp<false, string, false>);

 REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross")
                            .Device(DEVICE_CPU)
                            .TypeConstraint<int64>("out_type")
                            .TypeConstraint<string>("internal_type"),
-                        SparseFeatureCrossOp<true, int64>);
+                        SparseFeatureCrossOp<true, int64, false>);

 REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross")
                            .Device(DEVICE_CPU)
                            .TypeConstraint<int64>("out_type")
                            .TypeConstraint<int64>("internal_type"),
-                        SparseFeatureCrossOp<true, int64>);
+                        SparseFeatureCrossOp<true, int64, false>);
+
+// The following builders enable FingerprintCat64 concatenation for the
+// crosses features.
+REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<string>("out_type")
+                            .TypeConstraint<string>("internal_type"),
+                        SparseFeatureCrossOp<false, StringPiece, true>);
+
+REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<string>("out_type")
+                            .TypeConstraint<int64>("internal_type"),
+                        SparseFeatureCrossOp<false, string, true>);
+
+REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<int64>("out_type")
+                            .TypeConstraint<string>("internal_type"),
+                        SparseFeatureCrossOp<true, int64, true>);
+
+REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<int64>("out_type")
+                            .TypeConstraint<int64>("internal_type"),
+                        SparseFeatureCrossOp<true, int64, true>);

 }  // namespace tensorflow
--- a/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc
+++ b/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc
@ -68,9 +68,87 @@ then the output will be
 if hashed_output=true then the output will be

    shape = [2, 2]
-    [0, 0]: Hash64("f", Hash64("d", Hash64("a")))
-    [1, 0]: Hash64("g", Hash64("e", Hash64("b")))
-    [1, 1]: Hash64("g", Hash64("e", Hash64("c")))
+    [0, 0]: HashCombine(
+                Fingerprint64("f"), HashCombine(
+                    Fingerprint64("d"), Fingerprint64("a")))
+    [1, 0]: HashCombine(
+                Fingerprint64("g"), HashCombine(
+                    Fingerprint64("e"), Fingerprint64("b")))
+    [1, 1]: HashCombine(
+                Fingerprint64("g"), HashCombine(
+                    Fingerprint64("e"), Fingerprint64("c")))
+
+indices: 2-D.  Indices of each input `SparseTensor`.
+values: 1-D.   values of each `SparseTensor`.
+shapes: 1-D.   Shapes of each `SparseTensor`.
+dense: 2-D.    Columns represented by dense `Tensor`.
+output_indices: 2-D.  Indices of the concatenated `SparseTensor`.
+output_values: 1-D.  Non-empty values of the concatenated or hashed
+  `SparseTensor`.
+output_shape: 1-D.  Shape of the concatenated `SparseTensor`.
+)doc");
+
+REGISTER_OP("SparseFeatureCrossV2")
+    .Input("indices: N * int64")
+    .Input("values: sparse_types")
+    .Input("shapes: N * int64")
+    .Input("dense: dense_types")
+    .Output("output_indices: int64")
+    .Output("output_values: out_type")
+    .Output("output_shape: int64")
+    .Attr("N: int >= 0")
+    .Attr("hashed_output: bool")
+    .Attr("num_buckets: int >= 0")
+    .Attr("hash_key: int")
+    .Attr("sparse_types: list({int64, string}) >= 0")
+    .Attr("dense_types: list({int64, string}) >= 0")
+    .Attr("out_type: {int64, string}")
+    .Attr("internal_type: {int64, string}")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Matrix(c->UnknownDim(), 2));
+      c->set_output(1, c->Vector(c->UnknownDim()));
+      c->set_output(2, c->Vector(2));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Generates sparse cross form a list of sparse tensors.
+
+The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each
+representing features of one feature column. It outputs a 2D `SparseTensor` with
+the batchwise crosses of these features.
+
+For example, if the inputs are
+
+    inputs[0]: SparseTensor with shape = [2, 2]
+    [0, 0]: "a"
+    [1, 0]: "b"
+    [1, 1]: "c"
+
+    inputs[1]: SparseTensor with shape = [2, 1]
+    [0, 0]: "d"
+    [1, 0]: "e"
+
+    inputs[2]: Tensor [["f"], ["g"]]
+
+then the output will be
+
+    shape = [2, 2]
+    [0, 0]: "a_X_d_X_f"
+    [1, 0]: "b_X_e_X_g"
+    [1, 1]: "c_X_e_X_g"
+
+if hashed_output=true then the output will be
+
+    shape = [2, 2]
+    [0, 0]: FingerprintCat64(
+                Fingerprint64("f"), FingerprintCat64(
+                    Fingerprint64("d"), Fingerprint64("a")))
+    [1, 0]: FingerprintCat64(
+                Fingerprint64("g"), FingerprintCat64(
+                    Fingerprint64("e"), Fingerprint64("b")))
+    [1, 1]: FingerprintCat64(
+                Fingerprint64("g"), FingerprintCat64(
+                    Fingerprint64("e"), Fingerprint64("c")))

 indices: 2-D.  Indices of each input `SparseTensor`.
 values: 1-D.   values of each `SparseTensor`.
--- a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
+++ b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import numpy
 import tensorflow as tf


@ -253,10 +254,13 @@ class SparseCrossOpTest(tf.test.TestCase):
    Cross for the corresponding batch should be empty.
    """
    op = tf.contrib.layers.sparse_feature_cross([
-        self._sparse_tensor(
-            [['batch1-FC1-F1', 'batch1-FC1-F2']], 2), self._sparse_tensor(
-                [['batch1-FC2-F1'], ['batch2-FC2-F1']], 2), self._sparse_tensor(
-                    [['batch1-FC3-F1', 'batch1-FC3-F2']], 2)
+        self._sparse_tensor([
+            ['batch1-FC1-F1', 'batch1-FC1-F2']
+        ], 2), self._sparse_tensor([
+            ['batch1-FC2-F1'], ['batch2-FC2-F1']
+        ], 2), self._sparse_tensor([
+            ['batch1-FC3-F1', 'batch1-FC3-F2']
+        ], 2)
    ])
    expected_out = self._sparse_tensor([[
        'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1',
@ -298,6 +302,26 @@ class SparseCrossOpTest(tf.test.TestCase):
    with self.test_session() as sess:
      self._assert_sparse_tensor_equals(expected_out, sess.run(op))

+  def test_hashed_output_zero_bucket_v2(self):
+    """Tests a simple scenario.
+    """
+    op = tf.contrib.layers.sparse_feature_cross(
+        [
+            self._sparse_tensor([
+                ['batch1-FC1-F1']
+            ]), self._sparse_tensor([
+                ['batch1-FC2-F1']
+            ]), self._sparse_tensor([
+                ['batch1-FC3-F1']
+            ])
+        ],
+        hashed_output=True,
+        hash_key=tf.contrib.layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
+    # Check actual hashed output to prevent unintentional hashing changes.
+    expected_out = self._sparse_tensor([[1971693436396284976]])
+    with self.test_session() as sess:
+      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+
  # TODO(sibyl-Aix6ihai): Add benchmark to compare Hashed vs Non-hashed.
  def test_hashed_output(self):
    """Tests a simple scenario.
@ -319,6 +343,56 @@ class SparseCrossOpTest(tf.test.TestCase):
    with self.test_session() as sess:
      self._assert_sparse_tensor_equals(expected_out, sess.run(op))

+  def test_hashed_output_v2(self):
+    """Tests a simple scenario.
+    """
+    op = tf.contrib.layers.sparse_feature_cross(
+        [
+            self._sparse_tensor([
+                ['batch1-FC1-F1']
+            ]), self._sparse_tensor([
+                ['batch1-FC2-F1']
+            ]), self._sparse_tensor([
+                ['batch1-FC3-F1']
+            ])
+        ],
+        hashed_output=True,
+        num_buckets=100,
+        hash_key=tf.contrib.layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
+    # Check actual hashed output to prevent unintentional hashing changes.
+    expected_out = self._sparse_tensor([[83]])
+    with self.test_session() as sess:
+      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+
+  def test_hashed_output_v1_has_collision(self):
+    """Tests the old version of the fingerprint concatenation has collisions.
+    """
+    # The last 10 bits of 359 and 1024+359 are identical.
+    # As a result, all the crosses collide.
+    t1 = tf.constant([[359], [359 + 1024]])
+    t2 = tf.constant([list(range(10)), list(range(10))])
+    cross = tf.contrib.layers.sparse_feature_cross(
+        [t2, t1], hashed_output=True, num_buckets=1024)
+    cross_dense = tf.sparse_tensor_to_dense(cross)
+    with tf.Session():
+      values = cross_dense.eval()
+      self.assertTrue(numpy.equal(values[0], values[1]).all())
+
+  def test_hashed_output_v2_has_no_collision(self):
+    """Tests the new version of the fingerprint concatenation has no collisions.
+    """
+    # Although the last 10 bits of 359 and 1024+359 are identical.
+    # As a result, all the crosses shouldn't collide.
+    t1 = tf.constant([[359], [359 + 1024]])
+    t2 = tf.constant([list(range(10)), list(range(10))])
+    cross = tf.contrib.layers.sparse_feature_cross(
+        [t2, t1], hashed_output=True, num_buckets=1024,
+        hash_key=tf.contrib.layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
+    cross_dense = tf.sparse_tensor_to_dense(cross)
+    with tf.Session():
+      values = cross_dense.eval()
+      self.assertTrue(numpy.not_equal(values[0], values[1]).all())
+
  def test_hashed_3x1x2(self):
    """Tests 3x1x2 permutation with hashed output.
    """
--- a/tensorflow/contrib/layers/python/layers/embedding_ops.py
+++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py
@ -170,7 +170,8 @@ def _prune_invalid_ids(sparse_ids, sparse_weights):
  return sparse_ids, sparse_weights


-def hashed_embedding_lookup(params, values, dimension, name=None):
+def hashed_embedding_lookup(params, values, dimension, name=None,
+                            hash_key=None):
  """Looks up embeddings using parameter hashing for each value in `values`.

  The i-th embedding component of a value v in `values` is found by retrieving
@ -200,6 +201,9 @@ def hashed_embedding_lookup(params, values, dimension, name=None):
    values: `Tensor` of values to be embedded.
    dimension: Embedding dimension
    name: An optional name for this op.
+    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
+      function to combine the crosses fingerprints on SparseFeatureCrossOp
+      (optional).

  Returns:
    A tensor with shape [d0, ..., dn, dimension]
@ -243,7 +247,8 @@ def hashed_embedding_lookup(params, values, dimension, name=None):
    tensors_to_cross = [array_ops.tile(array_ops.expand_dims(
        math_ops.range(0, dimension), 0), array_ops.shape(values)), values]
    ids = sparse_feature_cross_op.sparse_feature_cross(
-        tensors_to_cross, hashed_output=True, num_buckets=num_params)
+        tensors_to_cross, hashed_output=True, num_buckets=num_params,
+        hash_key=hash_key)
    ids = sparse_ops.sparse_tensor_to_dense(ids)

    # No need to validate the indices since we have checked the params
@ -260,7 +265,8 @@ def hashed_embedding_lookup_sparse(params,
                                   dimension,
                                   combiner=None,
                                   default_value=None,
-                                   name=None):
+                                   name=None,
+                                   hash_key=None):
  """Looks up embeddings of a sparse feature using parameter hashing.

  See `tf.contrib.layers.hashed_embedding_lookup` for embedding with hashing.
@ -276,6 +282,9 @@ def hashed_embedding_lookup_sparse(params,
        the default.
    default_value: The value to use for an entry with no features.
    name: An optional name for this op.
+    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
+      function to combine the crosses fingerprints on SparseFeatureCrossOp
+      (optional).

  Returns:
     Dense tensor with shape [N, dimension] with N the number of rows in
@ -315,7 +324,8 @@ def hashed_embedding_lookup_sparse(params,
    values = sparse_values.values
    values, idx = array_ops.unique(values)

-    embeddings = hashed_embedding_lookup(params, values, dimension)
+    embeddings = hashed_embedding_lookup(params, values, dimension,
+                                         hash_key=hash_key)

    if combiner == "sum":
      embeddings = math_ops.sparse_segment_sum(embeddings, idx, segment_ids,
--- a/tensorflow/contrib/layers/python/layers/feature_column.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column.py
@ -1476,6 +1476,7 @@ def bucketized_column(source_column, boundaries):
 class _CrossedColumn(_FeatureColumn,
                     collections.namedtuple("_CrossedColumn",
                                            ["columns", "hash_bucket_size",
+                                             "hash_key",
                                             "combiner", "ckpt_to_load_from",
                                             "tensor_name_in_ckpt"])):
  """Represents a cross transformation also known as conjuction or combination.
@ -1536,6 +1537,7 @@ class _CrossedColumn(_FeatureColumn,
  def __new__(cls,
              columns,
              hash_bucket_size,
+              hash_key,
              combiner="sqrtn",
              ckpt_to_load_from=None,
              tensor_name_in_ckpt=None):
@ -1560,7 +1562,8 @@ class _CrossedColumn(_FeatureColumn,
    sorted_columns = sorted(
        [column for column in columns], key=lambda column: column.name)
    return super(_CrossedColumn, cls).__new__(cls, tuple(sorted_columns),
-                                              hash_bucket_size, combiner,
+                                              hash_bucket_size, hash_key,
+                                              combiner,
                                              ckpt_to_load_from,
                                              tensor_name_in_ckpt)

@ -1623,6 +1626,7 @@ class _CrossedColumn(_FeatureColumn,
        feature_tensors,
        hashed_output=True,
        num_buckets=self.hash_bucket_size,
+        hash_key=self.hash_key,
        name="cross")

  # pylint: disable=unused-argument
@ -1650,7 +1654,8 @@ class _CrossedColumn(_FeatureColumn,

 def crossed_column(columns, hash_bucket_size, combiner=None,
                   ckpt_to_load_from=None,
-                   tensor_name_in_ckpt=None):
+                   tensor_name_in_ckpt=None,
+                   hash_key=None):
  """Creates a _CrossedColumn.

  Args:
@ -1664,6 +1669,9 @@ def crossed_column(columns, hash_bucket_size, combiner=None,
    tensor_name_in_ckpt: (Optional). Name of the `Tensor` in the provided
      checkpoint from which to restore the column weights. Required if
      `ckpt_to_load_from` is not None.
+    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
+      function to combine the crosses fingerprints on SparseFeatureCrossOp
+      (optional).

  Returns:
    A _CrossedColumn.
@ -1682,6 +1690,7 @@ def crossed_column(columns, hash_bucket_size, combiner=None,
  return _CrossedColumn(
      columns,
      hash_bucket_size,
+      hash_key,
      combiner=combiner,
      ckpt_to_load_from=ckpt_to_load_from,
      tensor_name_in_ckpt=tensor_name_in_ckpt)
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops.py
@ -128,7 +128,6 @@ def _embeddings_from_arguments(column,
      embeddings,
      input_tensor,
      sparse_weights=weight_tensor,
-      default_id=0,
      combiner=args.combiner,
      name=column.name + 'weights')

@ -214,10 +213,8 @@ def input_from_feature_columns(columns_to_tensors,
    age_buckets = bucketized_column(
        source_column=age,
        boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
-    occupation_x_age = crossed_column(columns=[occupation, age_buckets],
-                                      hash_bucket_size=10000)

-    feature_columns=[occupation_emb, occupation_x_age]
+    feature_columns=[occupation_emb, age_buckets]

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
@ -328,7 +325,6 @@ def _create_embedding_lookup(column,
        variable,
        embedding_lookup_arguments.input_tensor,
        sparse_weights=embedding_lookup_arguments.weight_tensor,
-        default_id=0,
        combiner=embedding_lookup_arguments.combiner,
        name=column.name + '_weights')
    return variable, predictions
@ -387,7 +383,6 @@ def _create_joint_embedding_lookup(columns_to_tensors,
        variable,
        sparse_tensor,
        sparse_weights=None,
-        default_id=0,
        combiner='sum',
        name='_weights')
    return variable, predictions
@ -488,8 +483,6 @@ def weighted_sum_from_feature_columns(columns_to_tensors,

    occupation = sparse_column_with_hash_bucket(column_name="occupation",
                                              hash_bucket_size=1000)
-    occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16,
-                                     combiner="sum")
    age = real_valued_column("age")
    age_buckets = bucketized_column(
        source_column=age,
@ -497,7 +490,7 @@ def weighted_sum_from_feature_columns(columns_to_tensors,
    occupation_x_age = crossed_column(columns=[occupation, age_buckets],
                                      hash_bucket_size=10000)

-    feature_columns=[occupation_emb, occupation_x_age]
+    feature_columns=[age_buckets, occupation, occupation_x_age]

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
@ -644,7 +644,7 @@ class CreateInputLayersForDNNsTest(tf.test.TestCase):
    hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10)
    wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"],
                                  indices=[[0, 0], [1, 0], [1, 1]],
-                                  shape=[2, 2])
+                                  shape=[3, 2])
    features = {"wire": wire_tensor}
    embeded_sparse = tf.contrib.layers.embedding_column(
        hashed_sparse, 1, combiner="sum", initializer=init_ops.ones_initializer)
@ -653,18 +653,18 @@ class CreateInputLayersForDNNsTest(tf.test.TestCase):
    with self.test_session():
      tf.initialize_all_variables().run()
      # score: (number of values)
-      self.assertAllEqual(output.eval(), [[1.], [2.]])
+      self.assertAllEqual(output.eval(), [[1.], [2.], [0.]])

  def testEmbeddingColumnWithWeightedSparseColumnForDNN(self):
    ids = tf.contrib.layers.sparse_column_with_keys(
        "ids", ["marlo", "omar", "stringer"])
    ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"],
                                 indices=[[0, 0], [1, 0], [1, 1]],
-                                 shape=[2, 2])
+                                 shape=[3, 2])
    weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights")
    weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0],
                                     indices=[[0, 0], [1, 0], [1, 1]],
-                                     shape=[2, 2])
+                                     shape=[3, 2])
    features = {"ids": ids_tensor,
                "weights": weights_tensor}
    embeded_sparse = tf.contrib.layers.embedding_column(
@ -675,7 +675,7 @@ class CreateInputLayersForDNNsTest(tf.test.TestCase):
      tf.initialize_all_variables().run()
      tf.initialize_all_tables().run()
      # score: (sum of weights)
-      self.assertAllEqual(output.eval(), [[10.], [50.]])
+      self.assertAllEqual(output.eval(), [[10.], [50.], [0.]])

  def testInputLayerWithCollectionsForDNN(self):
    real_valued = tf.contrib.layers.real_valued_column("price")
@ -960,7 +960,7 @@ class SequenceInputFromFeatureColumnTest(tf.test.TestCase):

    # `ids_tensor` consists of 7 instances of <empty>, 3 occurences of "b",
    # 2 occurences of "c" and 1 instance of "a".
-    expected_gradient_values = sorted([7., 3., 2., 1.] * embedding_dimension)
+    expected_gradient_values = sorted([0., 3., 2., 1.] * embedding_dimension)
    actual_gradient_values = np.sort(gradients[0].values, axis=None)
    self.assertAllClose(expected_gradient_values, actual_gradient_values)

--- a/tensorflow/contrib/layers/python/layers/target_column.py
+++ b/tensorflow/contrib/layers/python/layers/target_column.py
@ -22,6 +22,7 @@ import six

 from tensorflow.contrib import losses
 from tensorflow.contrib import metrics as metrics_lib
+from tensorflow.contrib.framework import deprecated
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@ -30,6 +31,11 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn


+@deprecated(
+    "2016-11-12",
+    "This file will be removed after the deprecation date."
+    "Please switch to "
+    "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py")
 def regression_target(label_name=None,
                      weight_column_name=None,
                      target_dimension=1):
@ -54,6 +60,11 @@ def regression_target(label_name=None,
 # TODO(zakaria): Add logistic_regression_target


+@deprecated(
+    "2016-11-12",
+    "This file will be removed after the deprecation date."
+    "Please switch to "
+    "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py")
 def multi_class_target(n_classes, label_name=None, weight_column_name=None):
  """Creates a _TargetColumn for multi class single label classification.

@ -85,6 +96,11 @@ def multi_class_target(n_classes, label_name=None, weight_column_name=None):
                                 weight_column_name=weight_column_name)


+@deprecated(
+    "2016-11-12",
+    "This file will be removed after the deprecation date."
+    "Please switch to "
+    "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py")
 def binary_svm_target(label_name=None, weight_column_name=None):
  """Creates a _TargetColumn for binary classification with SVMs.

@ -105,6 +121,11 @@ def binary_svm_target(label_name=None, weight_column_name=None):
                                weight_column_name=weight_column_name)


+@deprecated(
+    "2016-11-12",
+    "This file will be removed after the deprecation date."
+    "Please switch to "
+    "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py")
 class ProblemType(object):
  UNSPECIFIED = 0
  CLASSIFICATION = 1
@ -391,7 +412,6 @@ def _log_loss_with_two_classes(logits, target):


 def _softmax_cross_entropy_loss(logits, target):
-  # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target.
  # Check that we got int32/int64 for classification.
  if (not target.dtype.is_compatible_with(dtypes.int64) and
      not target.dtype.is_compatible_with(dtypes.int32)):
@ -416,6 +436,11 @@ def _run_metrics(predictions, targets, metrics, weights):
  return result


+@deprecated(
+    "2016-11-12",
+    "This file will be removed after the deprecation date."
+    "Please switch to "
+    "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py")
 def get_default_binary_metrics_for_eval(thresholds):
  """Returns a dictionary of basic metrics for logistic regression.

--- a/tensorflow/contrib/layers/python/ops/sparse_feature_cross_op.py
+++ b/tensorflow/contrib/layers/python/ops/sparse_feature_cross_op.py
@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+
+from tensorflow.contrib.framework import deprecated_arg_values
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import load_library
@ -28,9 +30,21 @@ _sparse_feature_cross_op = load_library.load_op_library(
    resource_loader.get_path_to_datafile("_sparse_feature_cross_op.so"))
 assert _sparse_feature_cross_op, "Could not load _sparse_feature_cross_op.so."

+# Default hash key for the FingerprintCat64.
+SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY = 0xDECAFCAFFE

+
+@deprecated_arg_values(
+    "2016-11-20",
+    "The default behavior of sparse_feature_cross is changing, the default\n"
+    "value for hash_key will change to SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY.\n"
+    "From that point on sparse_feature_cross will always use FingerprintCat64\n"
+    "to concatenate the feature fingerprints. And the underlying\n"
+    "_sparse_feature_cross_op.sparse_feature_cross operation will be marked\n"
+    "as deprecated.",
+    hash_key=None)
 def sparse_feature_cross(inputs, hashed_output=False, num_buckets=0,
-                         name=None):
+                         name=None, hash_key=None):
  """Crosses a list of Tensor or SparseTensor objects.

  See sparse_feature_cross_kernel.cc for more details.
@ -42,6 +56,10 @@ def sparse_feature_cross(inputs, hashed_output=False, num_buckets=0,
    num_buckets: It is used if hashed_output is true.
      output = hashed_value%num_buckets if num_buckets > 0 else hashed_value.
    name: A name prefix for the returned tensors (optional).
+    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
+      function to combine the crosses fingerprints on SparseFeatureCrossOp.
+      The default value is None, but will become
+      SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY after 2016-11-20 (optional).

  Returns:
    A `SparseTensor` with the crossed features.
@ -74,18 +92,36 @@ def sparse_feature_cross(inputs, hashed_output=False, num_buckets=0,
      dense_inputs[i] = math_ops.to_int64(dense_inputs[i])
      internal_type = dtypes.int64

-  indices_out, values_out, shape_out = (
-      _sparse_feature_cross_op.sparse_feature_cross(indices,
-                                                    values,
-                                                    shapes,
-                                                    dense_inputs,
-                                                    hashed_output,
-                                                    num_buckets,
-                                                    out_type=out_type,
-                                                    internal_type=internal_type,
-                                                    name=name))
+  if hash_key:
+    indices_out, values_out, shape_out = (
+        _sparse_feature_cross_op.sparse_feature_cross_v2(
+            indices,
+            values,
+            shapes,
+            dense_inputs,
+            hashed_output,
+            num_buckets,
+            hash_key=hash_key,
+            out_type=out_type,
+            internal_type=internal_type,
+            name=name))
+  else:
+    indices_out, values_out, shape_out = (
+        _sparse_feature_cross_op.sparse_feature_cross(
+            indices,
+            values,
+            shapes,
+            dense_inputs,
+            hashed_output,
+            num_buckets,
+            out_type=out_type,
+            internal_type=internal_type,
+            name=name))
+
  return ops.SparseTensor(indices_out, values_out, shape_out)


 ops.RegisterShape("SparseFeatureCross")(common_shapes.call_cpp_shape_fn)
 ops.NotDifferentiable("SparseFeatureCross")
+ops.RegisterShape("SparseFeatureCrossV2")(common_shapes.call_cpp_shape_fn)
+ops.NotDifferentiable("SparseFeatureCrossV2")
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
@ -27,19 +27,7 @@ py_library(
 py_test(
    name = "base_test",
    size = "medium",
-    srcs = ["python/learn/tests/base_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":learn",
-        "//tensorflow:tensorflow_py",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
-
-py_test(
-    name = "load_csv_test",
-    size = "small",
-    srcs = ["python/learn/tests/load_csv_test.py"],
+    srcs = ["python/learn/estimators/base_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -51,7 +39,7 @@ py_test(
 py_test(
    name = "data_feeder_test",
    size = "small",
-    srcs = ["python/learn/tests/data_feeder_test.py"],
+    srcs = ["python/learn/learn_io/data_feeder_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -274,7 +262,7 @@ py_test(
 py_test(
    name = "estimators_test",
    size = "small",
-    srcs = ["python/learn/tests/estimators_test.py"],
+    srcs = ["python/learn/estimators/estimators_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -286,7 +274,7 @@ py_test(
 py_test(
    name = "metric_spec_test",
    size = "small",
-    srcs = ["python/learn/tests/metric_spec_test.py"],
+    srcs = ["python/learn/metric_spec_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -298,7 +286,7 @@ py_test(
 py_test(
    name = "experiment_test",
    size = "small",
-    srcs = ["python/learn/tests/experiment_test.py"],
+    srcs = ["python/learn/experiment_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -310,7 +298,7 @@ py_test(
 py_test(
    name = "graph_actions_test",
    size = "small",
-    srcs = ["python/learn/tests/graph_actions_test.py"],
+    srcs = ["python/learn/graph_actions_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -322,7 +310,7 @@ py_test(
 py_test(
    name = "learn_runner_test",
    size = "small",
-    srcs = ["python/learn/tests/learn_runner_test.py"],
+    srcs = ["python/learn/learn_runner_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -334,7 +322,7 @@ py_test(
 py_test(
    name = "monitors_test",
    size = "small",
-    srcs = ["python/learn/tests/monitors_test.py"],
+    srcs = ["python/learn/monitors_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -347,7 +335,7 @@ py_test(
    name = "run_config_test",
    size = "small",
    srcs = [
-        "python/learn/tests/run_config_test.py",
+        "python/learn/estimators/run_config_test.py",
    ],
    srcs_version = "PY2AND3",
    deps = [
@ -356,18 +344,6 @@ py_test(
    ],
 )

-py_test(
-    name = "basic_session_run_hooks_test",
-    size = "small",
-    srcs = ["python/learn/tests/basic_session_run_hooks_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":learn",
-        "//tensorflow:tensorflow_py",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
-
 py_test(
    name = "tensor_signature_test",
    srcs = ["python/learn/estimators/tensor_signature_test.py"],
@ -430,15 +406,10 @@ py_test(
 )

 py_test(
-    name = "dnn_test",
-    size = "medium",
-    srcs = ["python/learn/estimators/dnn_test.py"],
-    shard_count = 4,
+    name = "head_test",
+    size = "small",
+    srcs = ["python/learn/estimators/head_test.py"],
    srcs_version = "PY2AND3",
-    tags = [
-        "manual",  # http://b/31934515
-        "notap",
-    ],
    deps = [
        ":learn",
        "//tensorflow:tensorflow_py",
@ -447,9 +418,10 @@ py_test(
 )

 py_test(
-    name = "dnn_sampled_softmax_classifier_test",
-    size = "large",
-    srcs = ["python/learn/estimators/dnn_sampled_softmax_classifier_test.py"],
+    name = "dnn_test",
+    size = "medium",
+    srcs = ["python/learn/estimators/dnn_test.py"],
+    shard_count = 4,
    srcs_version = "PY2AND3",
    tags = [
        "manual",  # http://b/31934515
@ -538,7 +510,7 @@ py_test(
 py_test(
    name = "grid_search_test",
    size = "small",
-    srcs = ["python/learn/tests/grid_search_test.py"],
+    srcs = ["python/learn/grid_search_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -550,7 +522,7 @@ py_test(
 py_test(
    name = "io_test",
    size = "small",
-    srcs = ["python/learn/tests/io_test.py"],
+    srcs = ["python/learn/learn_io/io_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -562,7 +534,7 @@ py_test(
 py_test(
    name = "multioutput_test",
    size = "small",
-    srcs = ["python/learn/tests/multioutput_test.py"],
+    srcs = ["python/learn/estimators/multioutput_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -574,7 +546,7 @@ py_test(
 py_test(
    name = "nonlinear_test",
    size = "medium",
-    srcs = ["python/learn/tests/nonlinear_test.py"],
+    srcs = ["python/learn/estimators/nonlinear_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -586,7 +558,7 @@ py_test(
 py_test(
    name = "regression_test",
    size = "small",
-    srcs = ["python/learn/tests/regression_test.py"],
+    srcs = ["python/learn/estimators/regression_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -598,7 +570,7 @@ py_test(
 py_test(
    name = "ops_test",
    size = "small",
-    srcs = ["python/learn/ops/tests/ops_test.py"],
+    srcs = ["python/learn/ops/ops_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -610,7 +582,7 @@ py_test(
 py_test(
    name = "seq2seq_ops_test",
    size = "small",
-    srcs = ["python/learn/ops/tests/seq2seq_ops_test.py"],
+    srcs = ["python/learn/ops/seq2seq_ops_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
@ -687,7 +659,7 @@ py_test(
 py_test(
    name = "stability_test",
    size = "small",
-    srcs = ["python/learn/tests/stability_test.py"],
+    srcs = ["python/learn/estimators/stability_test.py"],
    srcs_version = "PY2AND3",
    deps = [
        ":learn",
--- a/tensorflow/contrib/learn/python/learn/datasets/BUILD
+++ b/tensorflow/contrib/learn/python/learn/datasets/BUILD
@ -43,3 +43,15 @@ filegroup(
    ),
    visibility = ["//tensorflow:__subpackages__"],
 )
+
+py_test(
+    name = "load_csv_test",
+    size = "small",
+    srcs = ["load_csv_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/learn",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
--- a/tensorflow/contrib/learn/python/learn/datasets/load_csv_test.py
+++ b/tensorflow/contrib/learn/python/learn/datasets/load_csv_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/init.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/init.py
@ -14,7 +14,6 @@
 # ==============================================================================

 """Estimators."""
-
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@ -32,6 +31,8 @@ from tensorflow.contrib.learn.python.learn.estimators.estimator import Estimator
 from tensorflow.contrib.learn.python.learn.estimators.estimator import infer_real_valued_columns_from_input
 from tensorflow.contrib.learn.python.learn.estimators.estimator import infer_real_valued_columns_from_input_fn
 from tensorflow.contrib.learn.python.learn.estimators.estimator import ModeKeys
+from tensorflow.contrib.learn.python.learn.estimators.head import MetricKey
+from tensorflow.contrib.learn.python.learn.estimators.head import PedictionKey
 from tensorflow.contrib.learn.python.learn.estimators.linear import LinearClassifier
 from tensorflow.contrib.learn.python.learn.estimators.linear import LinearRegressor
 from tensorflow.contrib.learn.python.learn.estimators.logistic_regressor import LogisticRegressor
--- a/tensorflow/contrib/learn/python/learn/estimators/base_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/base_test.py
@ -43,7 +43,7 @@ class BaseTest(tf.test.TestCase):
    feature_columns = learn.infer_real_valued_columns_from_input(x)
    regressor = learn.LinearRegressor(feature_columns=feature_columns)
    regressor.fit(x, y, max_steps=100)
-    score = mean_squared_error(y, regressor.predict(x))
+    score = mean_squared_error(y, np.array(list(regressor.predict(x))))
    self.assertLess(score, 1.0, "Failed with score = {0}".format(score))

  def testIris(self):
@ -52,7 +52,7 @@ class BaseTest(tf.test.TestCase):
        feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
        n_classes=3)
    classifier.fit(iris.data, [x for x in iris.target], max_steps=100)
-    score = accuracy_score(iris.target, classifier.predict(iris.data))
+    score = accuracy_score(iris.target, list(classifier.predict(iris.data)))
    self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))

  def testIrisAllVariables(self):
@ -82,7 +82,7 @@ class BaseTest(tf.test.TestCase):
        feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
        n_classes=3, model_dir=output_dir)
    classifier.fit(iris.data, iris.target, max_steps=100)
-    score = accuracy_score(iris.target, classifier.predict(iris.data))
+    score = accuracy_score(iris.target, list(classifier.predict(iris.data)))
    self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
    # TODO(ipolosukhin): Check that summaries are correctly written.

@ -92,9 +92,9 @@ class BaseTest(tf.test.TestCase):
        feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
        n_classes=3)
    classifier.fit(iris.data, iris.target, steps=100)
-    score1 = accuracy_score(iris.target, classifier.predict(iris.data))
+    score1 = accuracy_score(iris.target, list(classifier.predict(iris.data)))
    classifier.fit(iris.data, iris.target, steps=500)
-    score2 = accuracy_score(iris.target, classifier.predict(iris.data))
+    score2 = accuracy_score(iris.target, list(classifier.predict(iris.data)))
    self.assertGreater(
        score2, score1,
        "Failed with score2 {0} <= score1 {1}".format(score2, score1))
@ -120,9 +120,10 @@ class BaseTest(tf.test.TestCase):
        feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
        n_classes=3)
    classifier.fit(iris_data(), iris_target(), max_steps=500)
-    score1 = accuracy_score(iris.target, classifier.predict(iris.data))
+    score1 = accuracy_score(iris.target,
+                            list(classifier.predict(iris.data)))
    score2 = accuracy_score(iris.target,
-                            classifier.predict(iris_predict_data()))
+                            list(classifier.predict(iris_predict_data())))
    self.assertGreater(score1, 0.5, "Failed with score = {0}".format(score1))
    self.assertEqual(score2, score1, "Scores from {0} iterator doesn't "
                     "match score {1} from full "
@ -137,7 +138,7 @@ class BaseTest(tf.test.TestCase):
          feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
          n_classes=3)
      classifier.fit(iris.data, iris.target, max_steps=250)
-      score = log_loss(iris.target, classifier.predict_proba(iris.data))
+      score = log_loss(iris.target, list(classifier.predict_proba(iris.data)))
      self.assertLess(score, 0.8, "Failed with score = {0}".format(score))

  def testBoston(self):
@ -146,7 +147,8 @@ class BaseTest(tf.test.TestCase):
    regressor = learn.LinearRegressor(
        feature_columns=learn.infer_real_valued_columns_from_input(boston.data))
    regressor.fit(boston.data, boston.target, max_steps=500)
-    score = mean_squared_error(boston.target, regressor.predict(boston.data))
+    score = mean_squared_error(
+        boston.target, np.array(list(regressor.predict(boston.data))))
    self.assertLess(score, 150, "Failed with score = {0}".format(score))

  def testUnfitted(self):
--- a/tensorflow/contrib/learn/python/learn/estimators/classifier.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/classifier.py
@ -126,7 +126,7 @@ class Classifier(estimator.Estimator):
  @deprecated_arg_values(
      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
      as_iterable=False)
-  def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=False):
+  def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=True):
    """Returns predicted classes for given features.

    Args:
@ -160,7 +160,7 @@ class Classifier(estimator.Estimator):
      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
      as_iterable=False)
  def predict_proba(
-      self, x=None, input_fn=None, batch_size=None, as_iterable=False):
+      self, x=None, input_fn=None, batch_size=None, as_iterable=True):
    """Returns predicted probabilty distributions for given features.

    Args:
--- a/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py
@ -32,9 +32,8 @@ from tensorflow.contrib.session_bundle import manifest_pb2

 def iris_input_fn(num_epochs=None):
  iris = tf.contrib.learn.datasets.load_iris()
-  features = tf.reshape(tf.constant(iris.data), [-1, 4])
-  if num_epochs:
-    features = tf.train.limit_epochs(features, num_epochs=num_epochs)
+  features = tf.train.limit_epochs(
+      tf.reshape(tf.constant(iris.data), [-1, 4]), num_epochs=num_epochs)
  target = tf.reshape(tf.constant(iris.target), [-1])
  return features, target

@ -71,42 +70,22 @@ class ClassifierTest(tf.test.TestCase):
                                      params={'learning_rate': 0.01})
    self._runIrisAll(est)

-  def testIrisPredictAsIterable(self):
-    iris = tf.contrib.learn.datasets.load_iris()
-    est = tf.contrib.learn.Classifier(model_fn=logistic_model_fn, n_classes=3)
-    est.fit(iris.data, iris.target, steps=100)
-    scores = est.evaluate(x=iris.data, y=iris.target, name='eval')
-    predictions = list(est.predict(x=iris.data, as_iterable=True))
-    predictions_proba = list(est.predict_proba(x=iris.data, as_iterable=True))
-    self.assertEqual(len(predictions), iris.target.shape[0])
-    self.assertAllEqual(predictions, np.argmax(predictions_proba, axis=1))
-    other_score = _sklearn.accuracy_score(iris.target, predictions)
-    self.assertAllClose(other_score, scores['accuracy'])
-
  def testIrisInputFn(self):
-    iris = tf.contrib.learn.datasets.load_iris()
-    est = tf.contrib.learn.Classifier(model_fn=logistic_model_fn, n_classes=3)
-    est.fit(input_fn=iris_input_fn, steps=100)
-    est.evaluate(input_fn=iris_input_fn, steps=1, name='eval')
-    predictions = est.predict(input_fn=iris_input_fn)
-    self.assertEqual(predictions.shape[0], iris.target.shape[0])
-
-  def testIrisPredictInputFnAsIterable(self):
    iris = tf.contrib.learn.datasets.load_iris()
    est = tf.contrib.learn.Classifier(model_fn=logistic_model_fn, n_classes=3)
    est.fit(input_fn=iris_input_fn, steps=100)
    est.evaluate(input_fn=iris_input_fn, steps=1, name='eval')
    predict_input_fn = functools.partial(iris_input_fn, num_epochs=1)
-    predictions = list(est.predict(input_fn=predict_input_fn, as_iterable=True))
+    predictions = list(est.predict(input_fn=predict_input_fn))
    self.assertEqual(len(predictions), iris.target.shape[0])

  def _runIrisAll(self, est):
    iris = tf.contrib.learn.datasets.load_iris()
    est.fit(iris.data, iris.target, steps=100)
    scores = est.evaluate(x=iris.data, y=iris.target, name='eval')
-    predictions = est.predict(x=iris.data)
-    predictions_proba = est.predict_proba(x=iris.data)
-    self.assertEqual(predictions.shape[0], iris.target.shape[0])
+    predictions = list(est.predict(x=iris.data))
+    predictions_proba = list(est.predict_proba(x=iris.data))
+    self.assertEqual(len(predictions), iris.target.shape[0])
    self.assertAllEqual(predictions, np.argmax(predictions_proba, axis=1))
    other_score = _sklearn.accuracy_score(iris.target, predictions)
    self.assertAllClose(other_score, scores['accuracy'])
--- a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
@ -23,11 +23,11 @@ import tempfile

 import tensorflow as tf

-from tensorflow.contrib import layers
 from tensorflow.contrib import metrics as metrics_lib
 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 from tensorflow.contrib.learn.python.learn.estimators import composable_model
 from tensorflow.contrib.learn.python.learn.estimators import estimator
+from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import state_ops

@ -42,10 +42,10 @@ def _iris_input_fn():
 class _BaseEstimatorForTest(estimator.BaseEstimator):

  def __init__(self,
-               target_column,
+               head,
               feature_columns):
    super(_BaseEstimatorForTest, self).__init__(model_dir=tempfile.mkdtemp())
-    self._target_column = target_column
+    self._head = head
    self._feature_columns = feature_columns

  def _get_train_ops(self, features, targets):
@ -54,18 +54,22 @@ class _BaseEstimatorForTest(estimator.BaseEstimator):

    logits = self._model.build_model(
        features, self._feature_columns, is_training=True)
-    loss = self._target_column.loss(logits, targets, features)
-    train_step = self._model.get_train_step(loss)
+    model_fn_ops = self._head.head_ops(features, targets,
+                                       tf.contrib.learn.ModeKeys.TRAIN,
+                                       _noop_training_fn, logits=logits)
+    train_step = self._model.get_train_step(model_fn_ops.loss)

    with ops.control_dependencies(train_step):
      with ops.get_default_graph().colocate_with(global_step):
-        return state_ops.assign_add(global_step, 1).op, loss
+        return state_ops.assign_add(global_step, 1).op, model_fn_ops.loss

  def _get_eval_ops(self, features, targets, metrics=None):
    logits = self._model.build_model(
        features, self._feature_columns, is_training=False)
-    loss = self._target_column.loss(logits, targets, features)
-    return {'loss': metrics_lib.streaming_mean(loss)}
+    model_fn_ops = self._head.head_ops(features, targets,
+                                       tf.contrib.learn.ModeKeys.TRAIN,
+                                       _noop_training_fn, logits=logits)
+    return {'loss': metrics_lib.streaming_mean(model_fn_ops.loss)}

  def _get_predict_ops(self, features):
    raise NotImplementedError
@ -74,32 +78,32 @@ class _BaseEstimatorForTest(estimator.BaseEstimator):
 class LinearEstimator(_BaseEstimatorForTest):

  def __init__(self,
-               target_column,
+               head,
               feature_columns):
-    super(LinearEstimator, self).__init__(target_column, feature_columns)
+    super(LinearEstimator, self).__init__(head, feature_columns)
    self._model = composable_model.LinearComposableModel(
-        num_label_columns=target_column.num_label_columns)
+        num_label_columns=head.logits_dimension)


 class JointLinearEstimator(_BaseEstimatorForTest):

  def __init__(self,
-               target_column,
+               head,
               feature_columns):
-    super(JointLinearEstimator, self).__init__(target_column, feature_columns)
+    super(JointLinearEstimator, self).__init__(head, feature_columns)
    self._model = composable_model.LinearComposableModel(
-        num_label_columns=target_column.num_label_columns, _joint_weights=True)
+        num_label_columns=head.logits_dimension, _joint_weights=True)


 class DNNEstimator(_BaseEstimatorForTest):

  def __init__(self,
-               target_column,
+               head,
               feature_columns,
               hidden_units):
-    super(DNNEstimator, self).__init__(target_column, feature_columns)
+    super(DNNEstimator, self).__init__(head, feature_columns)
    self._model = composable_model.DNNComposableModel(
-        num_label_columns=target_column.num_label_columns,
+        num_label_columns=head.logits_dimension,
        hidden_units=hidden_units)


@ -119,8 +123,8 @@ class ComposableModelTest(tf.test.TestCase):
    language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100)
    age = tf.contrib.layers.real_valued_column('age')

-    target_column = layers.multi_class_target(n_classes=2)
-    classifier = LinearEstimator(target_column,
+    head = head_lib._multi_class_head(n_classes=2)
+    classifier = LinearEstimator(head,
                                 feature_columns=[age, language])

    classifier.fit(input_fn=input_fn, steps=1000)
@ -144,8 +148,8 @@ class ComposableModelTest(tf.test.TestCase):
    language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100)
    age = tf.contrib.layers.sparse_column_with_hash_bucket('age', 2)

-    target_column = layers.multi_class_target(n_classes=2)
-    classifier = JointLinearEstimator(target_column,
+    head = head_lib._multi_class_head(n_classes=2)
+    classifier = JointLinearEstimator(head,
                                      feature_columns=[age, language])

    classifier.fit(input_fn=input_fn, steps=1000)
@ -160,8 +164,8 @@ class ComposableModelTest(tf.test.TestCase):
    cont_features = [
        tf.contrib.layers.real_valued_column('feature', dimension=4)]

-    target_column = layers.multi_class_target(n_classes=3)
-    classifier = DNNEstimator(target_column,
+    head = head_lib._multi_class_head(n_classes=3)
+    classifier = DNNEstimator(head,
                              feature_columns=cont_features,
                              hidden_units=[3, 3])

@ -169,5 +173,9 @@ class ComposableModelTest(tf.test.TestCase):
    classifier.evaluate(input_fn=_iris_input_fn, steps=100)


+def _noop_training_fn(unused_loss):
+  return tf.no_op()
+
+
 if __name__ == '__main__':
  tf.test.main()
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
@ -267,16 +267,24 @@ def _dnn_classifier_model_fn(features, targets, mode, params):

  if mode == estimator.ModeKeys.TRAIN:
    targets = _reshape_targets(targets)
-    loss = loss_fn(logits, targets,
-                   weight=_get_weight_tensor(features, weight_column_name))
+    weight = _get_weight_tensor(features, weight_column_name)
+    training_loss = loss_fn(logits, targets, weight=weight)
+    loss = _rescale_eval_loss(training_loss, weight)

    train_ops = [optimizers.optimize_loss(
-        loss=loss, global_step=contrib_variables.get_global_step(),
-        learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer),
-        clip_gradients=gradient_clip_norm, name=parent_scope)]
+        loss=training_loss,
+        global_step=contrib_variables.get_global_step(),
+        learning_rate=_LEARNING_RATE,
+        optimizer=_get_optimizer(optimizer),
+        clip_gradients=gradient_clip_norm,
+        name=parent_scope,
+        # Empty summaries to prevent optimizers from logging the training_loss.
+        summaries=[])]
    if enable_centered_bias:
      train_ops.append(_centered_bias_step(targets, loss_fn, num_label_columns))

+    logging_ops.scalar_summary("loss", loss)
+
    return None, loss, control_flow_ops.group(*train_ops)

  elif mode == estimator.ModeKeys.EVAL:
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py
@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function

 import numpy as np
+import six

 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
@ -28,15 +29,12 @@ from tensorflow.contrib.framework.python.ops import variables as contrib_variabl
 from tensorflow.contrib.layers.python.layers import feature_column_ops
 from tensorflow.contrib.learn.python.learn.estimators import composable_model
 from tensorflow.contrib.learn.python.learn.estimators import estimator
+from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import training


 def _changing_default_center_bias():
@ -67,7 +65,7 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
  """

  def __init__(self,  # _joint_linear_weights pylint: disable=invalid-name
-               target_column,
+               head,
               model_dir=None,
               linear_feature_columns=None,
               linear_optimizer=None,
@ -78,13 +76,13 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
               dnn_activation_fn=nn.relu,
               dnn_dropout=None,
               gradient_clip_norm=None,
-               enable_centered_bias=True,
               config=None,
-               feature_engineering_fn=None):
+               feature_engineering_fn=None,
+               default_prediction_key=None):
    """Initializes a _DNNLinearCombinedBaseEstimator instance.

    Args:
-      target_column: A _TargetColumn object.
+      head: A _Head object.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
@ -111,14 +109,12 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
      gradient_clip_norm: A float > 0. If provided, gradients are clipped
        to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
-      enable_centered_bias: A bool. If True, estimator will learn a centered
-        bias variable for each class. Rest of the model structure learns the
-        residual after centered bias.
      config: RunConfig object to configure the runtime settings.
      feature_engineering_fn: Feature engineering function. Takes features and
                        targets which are the output of `input_fn` and
                        returns features and targets which will be fed
                        into the model.
+      default_prediction_key: Default prediction key to use with metrics.

    Raises:
      ValueError: If both linear_feature_columns and dnn_features_columns are
@ -130,14 +126,14 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
    num_ps_replicas = config.num_ps_replicas if config else 0

    self._linear_model = composable_model.LinearComposableModel(
-        num_label_columns=target_column.num_label_columns,
+        num_label_columns=head.logits_dimension,
        optimizer=linear_optimizer,
        _joint_weights=_joint_linear_weights,
        gradient_clip_norm=gradient_clip_norm,
        num_ps_replicas=num_ps_replicas)

    self._dnn_model = composable_model.DNNComposableModel(
-        num_label_columns=target_column.num_label_columns,
+        num_label_columns=head.logits_dimension,
        hidden_units=dnn_hidden_units,
        optimizer=dnn_optimizer,
        activation_fn=dnn_activation_fn,
@ -149,9 +145,8 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
    self._linear_optimizer = linear_optimizer
    self._dnn_feature_columns = dnn_feature_columns
    self._dnn_hidden_units = dnn_hidden_units
-    self._centered_bias_weight_collection = "centered_bias"
-    self._enable_centered_bias = enable_centered_bias
-    self._target_column = target_column
+    self._head = head
+    self._default_prediction_key = default_prediction_key
    self._feature_engineering_fn = (
        feature_engineering_fn or
        (lambda features, targets: (features, targets)))
@ -194,9 +189,12 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
    return (self._dnn_model.get_bias(model_dir=self._model_dir) +
            [self.get_variable_value("centered_bias_weight")])

-  def _get_target_column(self):
-    """Returns the target column of this Estimator."""
-    return self._target_column
+  # TODO(zakaria): Remove this function once export. export_estimator is
+  #   obsolete.
+  def _create_signature_fn(self):
+    """Returns a function to create export signature of this Estimator."""
+    # pylint: disable=protected-access
+    return self._head._create_signature_fn()

  def _get_feature_dict(self, features):
    if isinstance(features, dict):
@ -205,45 +203,60 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):

  def _get_train_ops(self, features, targets):
    """See base class."""
-    global_step = contrib_variables.get_global_step()
-    assert global_step

    features = self._get_feature_dict(features)
    features, targets = self._feature_engineering_fn(features, targets)
    logits = self._logits(features, is_training=True)
-    if self._enable_centered_bias:
-      centered_bias_step = [self._centered_bias_step(targets, features)]
-    else:
-      centered_bias_step = []
-    with ops.control_dependencies(centered_bias_step):
-      training_loss = self._target_column.training_loss(logits, targets,
-                                                        features)
-      weighted_average_loss = self._target_column.loss(logits, targets,
-                                                       features)

-    logging_ops.scalar_summary("loss", weighted_average_loss)
+    def _make_training_op(training_loss):
+      global_step = contrib_variables.get_global_step()
+      assert global_step

-    linear_train_step = self._linear_model.get_train_step(training_loss)
-    dnn_train_step = (self._dnn_model.get_train_step(training_loss) if
-                      self._dnn_model else [])
+      linear_train_step = self._linear_model.get_train_step(training_loss)
+      dnn_train_step = (self._dnn_model.get_train_step(training_loss) if
+                        self._dnn_model else [])
+      with ops.control_dependencies(linear_train_step + dnn_train_step):
+        with ops.get_default_graph().colocate_with(global_step):
+          return state_ops.assign_add(global_step, 1).op

-    with ops.control_dependencies(linear_train_step + dnn_train_step):
-      with ops.get_default_graph().colocate_with(global_step):
-        return state_ops.assign_add(global_step, 1).op, weighted_average_loss
+    model_fn_ops = self._head.head_ops(features, targets,
+                                       estimator.ModeKeys.TRAIN,
+                                       _make_training_op,
+                                       logits=logits)
+    return model_fn_ops.training_op, model_fn_ops.loss

  def _get_eval_ops(self, features, targets, metrics=None):
    """See base class."""
    features = self._get_feature_dict(features)
    features, targets = self._feature_engineering_fn(features, targets)
    logits = self._logits(features)
-    return self._target_column.get_eval_ops(features, logits, targets, metrics)
+
+    model_fn_ops = self._head.head_ops(features, targets,
+                                       estimator.ModeKeys.EVAL, None,
+                                       logits=logits)
+    all_metrics = model_fn_ops.default_metrics
+    if metrics:
+      for name, metric in six.iteritems(metrics):
+        if not isinstance(name, tuple):
+          # TODO(zakaria): remove once deprecation is finished (b/31229024)
+          all_metrics[(name, self._default_prediction_key)] = metric
+        else:
+          all_metrics[name] = metric
+    # TODO(zakaria): Remove this once we refactor this class to delegate
+    #   to estimator.
+    # pylint: disable=protected-access
+    result = estimator._make_metrics_ops(all_metrics, features, targets,
+                                         model_fn_ops.predictions)
+    return result

  def _get_predict_ops(self, features):
    """See base class."""
    features = self._get_feature_dict(features)
    features, _ = self._feature_engineering_fn(features, None)
    logits = self._logits(features)
-    return self._target_column.logits_to_predictions(logits, proba=True)
+    model_fn_ops = self._head.head_ops(features, None, estimator.ModeKeys.INFER,
+                                       None, logits=logits)
+    return model_fn_ops.predictions

  @deprecated(
      "2016-09-23",
@ -278,32 +291,6 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
    return self._linear_model.build_model(
        features, self._linear_feature_columns, is_training)

-  def _centered_bias(self):
-    centered_bias = variables.Variable(
-        array_ops.zeros([self._target_column.num_label_columns]),
-        collections=[self._centered_bias_weight_collection,
-                     ops.GraphKeys.VARIABLES],
-        name="centered_bias_weight")
-    logging_ops.scalar_summary(
-        ["centered_bias_%d" % cb for cb in range(
-            self._target_column.num_label_columns)],
-        array_ops.reshape(centered_bias, [-1]))
-    return centered_bias
-
-  def _centered_bias_step(self, targets, features):
-    centered_bias = ops.get_collection(self._centered_bias_weight_collection)
-    batch_size = array_ops.shape(targets)[0]
-    logits = array_ops.reshape(
-        array_ops.tile(centered_bias[0], [batch_size]),
-        [batch_size, self._target_column.num_label_columns])
-    with ops.name_scope(None, "centered_bias", (targets, features)):
-      training_loss = self._target_column.training_loss(
-          logits, targets, features)
-    # Learn central bias by an optimizer. 0.1 is a convervative lr for a
-    # single variable.
-    return training.AdagradOptimizer(0.1).minimize(
-        training_loss, var_list=centered_bias)
-
  def _logits(self, features, is_training=False):
    linear_feature_columns = self._get_linear_feature_columns()
    dnn_feature_columns = self._get_dnn_feature_columns()
@ -319,10 +306,7 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
    else:
      logits = self._linear_logits(features, is_training)

-    if self._enable_centered_bias:
-      return nn.bias_add(logits, self._centered_bias())
-    else:
-      return logits
+    return logits


 class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
@ -448,10 +432,11 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
    if enable_centered_bias is None:
      enable_centered_bias = True
      _changing_default_center_bias()
-
-    target_column = layers.multi_class_target(
+    # pylint: disable=protected-access
+    head = head_lib._multi_class_head(
        n_classes=n_classes,
-        weight_column_name=weight_column_name)
+        weight_column_name=weight_column_name,
+        enable_centered_bias=enable_centered_bias)
    super(DNNLinearCombinedClassifier, self).__init__(
        model_dir=model_dir,
        linear_feature_columns=linear_feature_columns,
@ -463,15 +448,15 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
        dnn_activation_fn=dnn_activation_fn,
        dnn_dropout=dnn_dropout,
        gradient_clip_norm=gradient_clip_norm,
-        enable_centered_bias=enable_centered_bias,
-        target_column=target_column,
+        head=head,
        config=config,
-        feature_engineering_fn=feature_engineering_fn)
+        feature_engineering_fn=feature_engineering_fn,
+        default_prediction_key=head_lib.PedictionKey.CLASSES)

  @deprecated_arg_values(
      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
      as_iterable=False)
-  def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=False):
+  def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=True):
    """Returns predicted classes for given features.

    Args:
@ -498,7 +483,7 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
      as_iterable=False)
  def predict_proba(
-      self, x=None, input_fn=None, batch_size=None, as_iterable=False):
+      self, x=None, input_fn=None, batch_size=None, as_iterable=True):
    """Returns prediction probabilities for given features.

    Args:
@ -517,6 +502,11 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
    return super(DNNLinearCombinedClassifier, self).predict(
        x=x, input_fn=input_fn, batch_size=batch_size, as_iterable=as_iterable)

+  def _get_predict_ops(self, features):
+    """See base class."""
+    return super(DNNLinearCombinedClassifier, self)._get_predict_ops(features)[
+        head_lib.PedictionKey.PROBABILITIES]
+

 class DNNLinearCombinedRegressor(_DNNLinearCombinedBaseEstimator):
  """A regressor for TensorFlow Linear and DNN joined training models.
@ -642,9 +632,11 @@ class DNNLinearCombinedRegressor(_DNNLinearCombinedBaseEstimator):
    if enable_centered_bias is None:
      enable_centered_bias = True
      _changing_default_center_bias()
-    target_column = layers.regression_target(
+    # pylint: disable=protected-access
+    head = head_lib._regression_head(
        weight_column_name=weight_column_name,
-        target_dimension=target_dimension)
+        target_dimension=target_dimension,
+        enable_centered_bias=enable_centered_bias)
    super(DNNLinearCombinedRegressor, self).__init__(
        model_dir=model_dir,
        linear_feature_columns=linear_feature_columns,
@ -656,7 +648,14 @@ class DNNLinearCombinedRegressor(_DNNLinearCombinedBaseEstimator):
        dnn_activation_fn=dnn_activation_fn,
        dnn_dropout=dnn_dropout,
        gradient_clip_norm=gradient_clip_norm,
-        enable_centered_bias=enable_centered_bias,
-        target_column=target_column,
+        head=head,
        config=config,
-        feature_engineering_fn=feature_engineering_fn)
+        feature_engineering_fn=feature_engineering_fn,
+        default_prediction_key=head_lib.PedictionKey.SCORES)
+
+  def _get_predict_ops(self, features):
+    """See base class."""
+    return super(DNNLinearCombinedRegressor, self)._get_predict_ops(features)[
+        head_lib.PedictionKey.SCORES]
+
+
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
@ -254,7 +254,6 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
        dnn_feature_columns=[tf.contrib.layers.real_valued_column('x')],
        dnn_hidden_units=[3, 3],
        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
-
    classifier.fit(input_fn=_input_fn_train, steps=100)
    scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1)
    # Weighted cross entropy = (-7*log(0.25)-3*log(0.75))/10 = 1.06
@ -289,7 +288,6 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
        dnn_feature_columns=[tf.contrib.layers.real_valued_column('x')],
        dnn_hidden_units=[3, 3],
        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
-
    classifier.fit(input_fn=_input_fn_train, steps=100)
    scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1)
    # The model should learn (y = x) because of the weights, so the accuracy
@ -371,7 +369,7 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
    def _input_fn_train():
      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
      target = tf.constant([[1], [0], [0], [0]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32)}
      return features, target

    def _input_fn_predict():
@ -387,30 +385,26 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):

    classifier.fit(input_fn=_input_fn_train, steps=100)

-    probs = classifier.predict_proba(input_fn=_input_fn_predict)
+    probs = list(classifier.predict_proba(input_fn=_input_fn_predict))
    self.assertAllClose([[0.75, 0.25]] * 4, probs, 0.05)
-    classes = classifier.predict(input_fn=_input_fn_predict)
-    self.assertListEqual([0] * 4, list(classes))
-
-    probs = classifier.predict_proba(
-        input_fn=_input_fn_predict, as_iterable=True)
-    self.assertAllClose([[0.75, 0.25]] * 4, list(probs), 0.05)
-    classes = classifier.predict(
-        input_fn=_input_fn_predict, as_iterable=True)
-    self.assertListEqual([0] * 4, list(classes))
+    classes = list(classifier.predict(input_fn=_input_fn_predict))
+    self.assertListEqual([0] * 4, classes)

  def testCustomMetrics(self):
    """Tests custom evaluation metrics."""

-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
      target = tf.constant([[1], [0], [0], [0]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {
+          'x': tf.train.limit_epochs(
+              tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)}
      return features, target

    def _my_metric_op(predictions, targets):
      # For the case of binary classification, the 2nd column of "predictions"
      # denotes the model predictions.
+      targets = tf.to_float(targets)
      predictions = tf.slice(predictions, [0, 1], [-1, 1])
      return tf.reduce_sum(tf.mul(predictions, targets))

@ -419,9 +413,9 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
        dnn_feature_columns=[tf.contrib.layers.real_valued_column('x')],
        dnn_hidden_units=[3, 3])

-    classifier.fit(input_fn=_input_fn_train, steps=100)
+    classifier.fit(input_fn=_input_fn, steps=100)
    scores = classifier.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
        steps=100,
        metrics={
            'my_accuracy': tf.contrib.metrics.streaming_accuracy,
@ -431,22 +425,24 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
    self.assertTrue(
        set(['loss', 'my_accuracy', 'my_precision', 'my_metric'
            ]).issubset(set(scores.keys())))
-    predictions = classifier.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(
+        list(classifier.predict(input_fn=predict_input_fn)))
    self.assertEqual(_sklearn.accuracy_score([1, 0, 0, 0], predictions),
                     scores['my_accuracy'])

    # Test the case where the 2nd element of the key is neither "classes" nor
    # "probabilities".
-    with self.assertRaises(ValueError):
+    with self.assertRaises(KeyError):
      classifier.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
          steps=100,
          metrics={('bad_name', 'bad_type'): tf.contrib.metrics.streaming_auc})

    # Test the case where the tuple of the key doesn't have 2 elements.
    with self.assertRaises(ValueError):
      classifier.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
          steps=100,
          metrics={
              ('bad_length_name', 'classes', 'bad_length'):
@ -536,7 +532,6 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
    self.assertNotIn('dnn/logits/weights', classifier.get_variable_names())
    self.assertEquals(1, len(classifier.linear_bias_))
    self.assertEquals(2, len(classifier.linear_weights_))
-    print(classifier.linear_weights_)
    self.assertEquals(1, len(classifier.linear_weights_['linear/age/weight']))
    self.assertEquals(
        100, len(classifier.linear_weights_['linear/language/weights']))
@ -810,10 +805,11 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase):

  def testCustomMetrics(self):
    """Tests custom evaluation metrics."""
-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
      target = tf.constant([[1.], [0.], [0.], [0.]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {'x': tf.train.limit_epochs(
+          tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)}
      return features, target

    def _my_metric_op(predictions, targets):
@ -825,9 +821,9 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase):
        dnn_hidden_units=[3, 3],
        config=tf.contrib.learn.RunConfig(tf_random_seed=1))

-    regressor.fit(input_fn=_input_fn_train, steps=100)
+    regressor.fit(input_fn=_input_fn, steps=100)
    scores = regressor.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
        steps=1,
        metrics={
            'my_error': tf.contrib.metrics.streaming_mean_squared_error,
@ -836,25 +832,27 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase):
    self.assertIn('loss', set(scores.keys()))
    self.assertIn('my_error', set(scores.keys()))
    self.assertIn('my_metric', set(scores.keys()))
-    predictions = regressor.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(list(regressor.predict(input_fn=predict_input_fn)))
    self.assertAlmostEqual(
        _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
        scores['my_error'])

    # Tests that when the key is a tuple, an error is raised.
-    with self.assertRaises(TypeError):
+    with self.assertRaises(KeyError):
      regressor.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
          steps=1,
          metrics={('my_error', 'predictions'
                   ): tf.contrib.metrics.streaming_mean_squared_error})

  def testTrainSaveLoad(self):
    """Tests regression with restarting training / evaluate."""
-    def _input_fn():
+    def _input_fn(num_epochs=None):
      # Create 4 rows of (y = x)
      target = tf.constant([[100.], [3.], [2.], [2.]])
-      features = {'x': tf.constant([[100.], [3.], [2.], [2.]])}
+      features = {'x': tf.train.limit_epochs(
+          tf.constant([[100.], [3.], [2.], [2.]]), num_epochs=num_epochs)}
      return features, target

    model_dir = tempfile.mkdtemp()
@ -866,13 +864,14 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase):
        model_dir=model_dir,
        config=tf.contrib.learn.RunConfig(tf_random_seed=1))

+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
    classifier = new_estimator()
    classifier.fit(input_fn=_input_fn, steps=100)
-    predictions = classifier.predict(input_fn=_input_fn)
+    predictions = list(classifier.predict(input_fn=predict_input_fn))
    del classifier

    classifier = new_estimator()
-    predictions2 = classifier.predict(input_fn=_input_fn)
+    predictions2 = list(classifier.predict(input_fn=predict_input_fn))
    self.assertAllClose(predictions, predictions2)

  def testTrainWithPartitionedVariables(self):
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier.py
@ -1,568 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-
-"""Deep Neural Network estimator for large multi-class multi-label problems.
-
-The Training is sped up using Candidate Sampling. Evaluation and Inference
-uses full softmax.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-import tempfile
-
-from tensorflow.contrib import framework as contrib_framework
-from tensorflow.contrib import layers
-from tensorflow.contrib.framework.python.ops import variables
-from tensorflow.contrib.layers.python.layers import initializers
-from tensorflow.contrib.layers.python.layers import optimizers
-from tensorflow.contrib.learn.python.learn import evaluable
-from tensorflow.contrib.learn.python.learn import metric_spec
-from tensorflow.contrib.learn.python.learn import trainable
-from tensorflow.contrib.learn.python.learn.estimators import estimator
-from tensorflow.contrib.metrics.python.ops import metric_ops
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import partitioned_variables
-from tensorflow.python.ops import standard_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.training import training as train
-
-
-_CLASSES = "classes"
-_TOP_K = "top_k"
-_PROBABILITIES = "probabilities"
-_DEFAULT_LEARNING_RATE = 0.01
-
-
-def _as_iterable(preds, output):
-  for pred in preds:
-    yield pred[output]
-
-
-def _get_optimizer(optimizer):
-  if callable(optimizer):
-    return optimizer()
-  else:
-    return optimizer
-
-
-def _get_default_optimizer():
-  """Default optimizer for DNN models."""
-  return train.AdagradOptimizer(_DEFAULT_LEARNING_RATE)
-
-
-def _get_feature_dict(features):
-  if isinstance(features, dict):
-    return features
-  return {"": features}
-
-
-def _dnn_sampled_softmax_classifier_model_fn(features, targets, mode, params):
-  """model_fn that uses candidate sampling.
-
-  Args:
-    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
-    targets: A single Tensor of shape [batch_size, n_labels] containing
-      the target indices.
-    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
-    params: A dict of hyperparameters that are listed below.
-      hidden_units- List of hidden units per layer. All layers are fully
-        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
-        has 32.
-      feature_columns- An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `FeatureColumn`.
-      n_classes- number of target classes. It must be greater than 2.
-      n_samples- number of sample target classes. Needs to be tuned - A good
-        starting point could be 2% of n_classes.
-      n_labels- number of labels in each example.
-      top_k- The number of classes to predict.
-      optimizer- An instance of `tf.Optimizer` used to train the model. If
-        `None`, will use an Adagrad optimizer.
-      dropout- When not `None`, the probability we will drop out a given
-        coordinate.
-      gradient_clip_norm- A float > 0. If provided, gradients are
-        clipped to their global norm with this clipping ratio. See
-        tf.clip_by_global_norm for more details.
-      num_ps_replicas- The number of parameter server replicas.
-
-  Returns:
-    predictions: A single Tensor or a dict of Tensors.
-    loss: A scalar containing the loss of the step.
-    train_op: The op for training.
-  """
-
-  hidden_units = params["hidden_units"]
-  feature_columns = params["feature_columns"]
-  n_classes = params["n_classes"]
-  n_samples = params["n_samples"]
-  n_labels = params["n_labels"]
-  top_k = params["top_k"]
-  optimizer = params["optimizer"]
-  dropout = params["dropout"]
-  gradient_clip_norm = params["gradient_clip_norm"]
-  num_ps_replicas = params["num_ps_replicas"]
-
-  parent_scope = "dnn_ss"
-
-  features = _get_feature_dict(features)
-  targets = _reshape_targets(targets)
-
-  # Setup the input layer partitioner.
-  input_layer_partitioner = (
-      partitioned_variables.min_max_variable_partitioner(
-          max_partitions=num_ps_replicas,
-          min_slice_size=64 << 20))
-
-  # Create the input layer.
-  with variable_scope.variable_scope(
-      parent_scope + "/input_from_feature_columns",
-      features.values(),
-      partitioner=input_layer_partitioner) as scope:
-    net = layers.input_from_feature_columns(
-        features,
-        feature_columns,
-        weight_collections=[parent_scope],
-        scope=scope)
-
-  # Setup the hidden layer partitioner.
-  hidden_layer_partitioner = (
-      partitioned_variables.min_max_variable_partitioner(
-          max_partitions=num_ps_replicas))
-
-  final_hidden_layer_dim = None
-  # Create hidden layers using fully_connected.
-  for layer_id, num_hidden_units in enumerate(hidden_units):
-    with variable_scope.variable_scope(
-        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
-        partitioner=hidden_layer_partitioner) as scope:
-      net = layers.fully_connected(net,
-                                   num_hidden_units,
-                                   variables_collections=[parent_scope],
-                                   scope=scope)
-      final_hidden_layer_dim = num_hidden_units
-      # Add dropout if it is enabled.
-      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
-        net = layers.dropout(net, keep_prob=(1.0 - dropout))
-
-  # Create the weights and biases for the logit layer.
-  with variable_scope.variable_scope(
-      parent_scope + "/logits", [net],
-      partitioner=hidden_layer_partitioner) as scope:
-    dtype = net.dtype.base_dtype
-    weights_shape = [n_classes, final_hidden_layer_dim]
-    weights = variables.model_variable(
-        "weights",
-        shape=weights_shape,
-        dtype=dtype,
-        initializer=initializers.xavier_initializer(),
-        trainable=True,
-        collections=[parent_scope])
-    biases = variables.model_variable(
-        "biases",
-        shape=[n_classes,],
-        dtype=dtype,
-        initializer=init_ops.zeros_initializer,
-        trainable=True,
-        collections=[parent_scope])
-
-  if mode == estimator.ModeKeys.TRAIN:
-    # Call the candidate sampling APIs and calculate the loss.
-    sampled_values = nn.learned_unigram_candidate_sampler(
-        true_classes=math_ops.to_int64(targets),
-        num_true=n_labels,
-        num_sampled=n_samples,
-        unique=True,
-        range_max=n_classes)
-
-    sampled_softmax_loss = nn.sampled_softmax_loss(
-        weights=weights,
-        biases=biases,
-        inputs=net,
-        labels=math_ops.to_int64(targets),
-        num_sampled=n_samples,
-        num_classes=n_classes,
-        num_true=n_labels,
-        sampled_values=sampled_values)
-
-    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")
-
-    train_op = optimizers.optimize_loss(
-        loss=loss, global_step=contrib_framework.get_global_step(),
-        learning_rate=_DEFAULT_LEARNING_RATE,
-        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
-        name=parent_scope)
-    return None, loss, train_op
-
-  elif mode == estimator.ModeKeys.EVAL:
-    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
-                         biases)
-    predictions = {}
-    predictions[_PROBABILITIES] = nn.softmax(logits)
-    predictions[_CLASSES] = math_ops.argmax(logits, 1)
-    _, predictions[_TOP_K] = nn.top_k(logits, top_k)
-
-    # Since the targets have multiple labels, setup the target probabilities
-    # as 1.0/n_labels for each of the labels.
-    target_one_hot = array_ops.one_hot(
-        indices=targets, depth=n_classes, on_value=1.0 / n_labels)
-    target_one_hot = math_ops.reduce_sum(
-        input_tensor=target_one_hot,
-        reduction_indices=[1])
-
-    loss = math_ops.reduce_mean(
-        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))
-
-    return predictions, loss, None
-
-  elif mode == estimator.ModeKeys.INFER:
-    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
-                         biases)
-    predictions = {}
-    predictions[_PROBABILITIES] = nn.softmax(logits)
-    predictions[_CLASSES] = math_ops.argmax(logits, 1)
-    _, predictions[_TOP_K] = nn.top_k(logits, top_k)
-
-    return predictions, None, None
-
-
-def _reshape_targets(targets):
-  if targets is None:
-    return None
-  check_shape_op = control_flow_ops.Assert(
-      math_ops.less_equal(array_ops.rank(targets), 2),
-      ["target's should be either [batch_size, n_labels] or [batch_size]"])
-  with ops.control_dependencies([check_shape_op]):
-    targets = array_ops.reshape(
-        targets, shape=[array_ops.shape(targets)[0], -1])
-  return targets
-
-
-def _top_k_fn_wrapper(metric_fn, k):
-
-  def wrap_func(predictions, labels):
-    return metric_fn(predictions, _reshape_targets(labels), k=k)
-
-  wrap_func.__name__ = metric_fn.__name__
-  return wrap_func
-
-
-class _DNNSampledSoftmaxClassifier(trainable.Trainable, evaluable.Evaluable):
-  """A classifier for TensorFlow DNN models.
-
-  Example:
-
-  ```python
-  legos = sparse_column_with_hash_bucket(column_name="legos",
-                                         hash_bucket_size=1000)
-  watched_videos = sparse_column_with_hash_bucket(
-                     column_name="watched_videos",
-                     hash_bucket_size=20000)
-
-  legos_emb = embedding_column(sparse_id_column=legos, dimension=16,
-                               combiner="sum")
-  watched_videos_emb = embedding_column(sparse_id_column=watched_videos,
-                                        dimension=256,
-                                        combiner="sum")
-
-  estimator = DNNSampledSoftmaxClassifier(
-      n_classes=500000, n_samples=10000, n_labels=5,
-      feature_columns=[legos_emb, watched_videos_emb],
-      hidden_units=[1024, 512, 256])
-
-  # Or estimator using the Adam optimizer with dropout.
-  estimator = DNNSampledSoftmaxClassifier(
-      feature_columns=[education_emb, occupation_emb],
-      hidden_units=[1024, 512, 256],
-      optimizer=tf.train.ProximalAdagradOptimizer(
-        learning_rate=0.1),
-      dropout=0.1)
-
-  # Input builders
-  def input_fn_train: # returns x, Y
-    pass
-  estimator.fit(input_fn=input_fn_train)
-
-  def input_fn_eval: # returns x, Y
-    pass
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(x=x)
-  ```
-
-  Input of `fit` and `evaluate` should have following features,
-    otherwise there will be a `KeyError`:
-
-  * for each `column` in `feature_columns`:
-    - if `column` is a `SparseColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `EmbeddingColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `WeightedSparseColumn`, two features: the first with
-      `key` the id column name, the second with `key` the weight column name.
-      Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-  """
-
-  def __init__(self,
-               hidden_units,
-               feature_columns,
-               n_classes,
-               n_samples,
-               n_labels=1,
-               top_k=1,
-               model_dir=None,
-               optimizer=None,
-               dropout=None,
-               gradient_clip_norm=None,
-               config=None,
-               feature_engineering_fn=None):
-    """Initializes a DNNSampledSoftmaxClassifier instance.
-
-    Args:
-      hidden_units: List of hidden units per layer. All layers are fully
-        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
-        has 32.
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `FeatureColumn`.
-      n_classes: number of target classes. It must be greater than 2.
-      n_samples: number of sample target classes. Needs to be tuned - A good
-        starting point could be 2% of n_classes.
-      n_labels: number of labels in each example.
-      top_k: The number of classes to predict.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      optimizer: An instance of `tf.Optimizer` used to train the model. If
-        `None`, will use an Adagrad optimizer.
-      dropout: When not `None`, the probability we will drop out a given
-        coordinate.
-      gradient_clip_norm: A float > 0. If provided, gradients are
-        clipped to their global norm with this clipping ratio. See
-        tf.clip_by_global_norm for more details.
-      config: `RunConfig` object to configure the runtime settings.
-      feature_engineering_fn: Feature engineering function. Takes features and
-                        targets which are the output of `input_fn` and
-                        returns features and targets which will be fed
-                        into the model.
-
-    Returns:
-      A `DNNSampledSoftmaxClassifier` estimator.
-
-    Raises:
-      ValueError: If n_classes <= 2.
-      ValueError: If n_classes < n_samples.
-      ValueError: If n_classes < n_labels.
-    """
-    # Validate all the inputs.
-    if n_classes <= 2:
-      raise ValueError("n_classes should be greater than 2. For n_classes <= 2,"
-                       " use DNNClassifier.")
-    if n_classes < n_samples:
-      raise ValueError("n_classes (%d) should be greater than n_samples (%d)." %
-                       (n_classes, n_samples))
-    if n_classes < n_labels:
-      raise ValueError("n_classes (%d) should be greater than n_labels"
-                       " (%d)." % (n_classes, n_labels))
-
-    self._top_k = top_k
-    self._feature_columns = feature_columns
-    assert self._feature_columns
-    self._model_dir = model_dir or tempfile.mkdtemp()
-
-    # Build the estimator with _dnn_sampled_softmax_classifier_model_fn.
-    self._estimator = estimator.Estimator(
-        model_fn=_dnn_sampled_softmax_classifier_model_fn,
-        model_dir=self._model_dir,
-        config=config,
-        params={
-            "hidden_units": hidden_units,
-            "feature_columns": feature_columns,
-            "n_classes": n_classes,
-            "n_samples": n_samples,
-            "n_labels": n_labels,
-            "top_k": top_k,
-            "optimizer": optimizer or _get_default_optimizer(),
-            "dropout": dropout,
-            "gradient_clip_norm": gradient_clip_norm,
-            "num_ps_replicas": config.num_ps_replicas if config else 0
-        },
-        feature_engineering_fn=feature_engineering_fn)
-
-  def get_estimator(self):
-    return self._estimator
-
-  def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
-          monitors=None, max_steps=None):
-    """See trainable.Trainable."""
-    return self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps,
-                               batch_size=batch_size, monitors=monitors,
-                               max_steps=max_steps)
-
-  def evaluate(self, x=None, y=None, input_fn=None, feed_fn=None,
-               batch_size=None, steps=None, metrics=None, name=None,
-               range_k=None):
-    # pylint: disable=g-doc-args,g-doc-return-or-yield
-    """See evaluable.Evaluable for a description of the Args.
-
-    Calculates the following metrics by default:
-      loss
-      average_precision@top_k: see
-        https://en.wikipedia.org/wiki/Information_retrieval#Average_precision
-      for k in range_k:
-        precision@k and recall@k
-
-    range_k: A list of numbers where precision and recall have to be obtained.
-      For eg. range_k=[1,5] will calculate precision@1, precision@5,
-      recall@1 and recall@5. If None, defaults to [1, top_k].
-    """
-    if not metrics:
-      metrics = {}
-    metrics.update({
-        "average_precision_at_%d" % self._top_k: metric_spec.MetricSpec(
-            metric_fn=_top_k_fn_wrapper(
-                metric_ops.streaming_sparse_average_precision_at_k,
-                k=self._top_k),
-            prediction_key=_PROBABILITIES)
-    })
-    if range_k is None:
-      if self._top_k > 1:
-        range_k = [1, self._top_k]
-      else:
-        range_k = [1]
-    for k in range_k:
-      metrics.update({
-          "precision_at_%d" % k: metric_spec.MetricSpec(
-              metric_fn=_top_k_fn_wrapper(
-                  metric_ops.streaming_sparse_precision_at_k, k=k),
-              prediction_key=_PROBABILITIES,)
-      })
-      metrics.update({
-          "recall_at_%d" % k: metric_spec.MetricSpec(
-              metric_fn=_top_k_fn_wrapper(
-                  metric_ops.streaming_sparse_recall_at_k, k=k),
-              prediction_key=_PROBABILITIES,)
-      })
-
-    return self._estimator.evaluate(x=x, y=y, input_fn=input_fn,
-                                    feed_fn=feed_fn, batch_size=batch_size,
-                                    steps=steps, metrics=metrics, name=name)
-
-  def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=False,
-              get_top_k=False):
-    """Returns predicted classes for given features.
-
-    Args:
-      x: features.
-      input_fn: Input function. If set, x must be None.
-      batch_size: Override default batch size.
-      as_iterable: If True, return an iterable which keeps yielding predictions
-        for each example until inputs are exhausted. Note: The inputs must
-        terminate if you want the iterable to terminate (e.g. be sure to pass
-        num_epochs=1 if you are using something like read_batch_features).
-      get_top_k : if set to true returns the top k classes otherwise returns
-        the top class.
-
-    Returns:
-      Numpy array of predicted classes (or an iterable of predicted classes if
-      as_iterable is True).
-    """
-    if get_top_k:
-      key = _TOP_K
-    else:
-      key = _CLASSES
-    preds = self._estimator.predict(x=x, input_fn=input_fn,
-                                    batch_size=batch_size, outputs=[key],
-                                    as_iterable=as_iterable)
-    if as_iterable:
-      return _as_iterable(preds, output=key)
-    return preds[key]
-
-  def predict_proba(self, x=None, input_fn=None, batch_size=None,
-                    as_iterable=False):
-    """Returns prediction probabilities for given features.
-
-    Args:
-      x: features.
-      input_fn: Input function. If set, x and y must be None.
-      batch_size: Override default batch size.
-      as_iterable: If True, return an iterable which keeps yielding predictions
-        for each example until inputs are exhausted. Note: The inputs must
-        terminate if you want the iterable to terminate (e.g. be sure to pass
-        num_epochs=1 if you are using something like read_batch_features).
-
-    Returns:
-      Numpy array of predicted probabilities (or an iterable of predicted
-      probabilities if as_iterable is True).
-    """
-    preds = self._estimator.predict(x=x, input_fn=input_fn,
-                                    batch_size=batch_size,
-                                    outputs=[_PROBABILITIES],
-                                    as_iterable=as_iterable)
-    if as_iterable:
-      return _as_iterable(preds, output=_PROBABILITIES)
-    return preds[_PROBABILITIES]
-
-  def export(self, export_dir, signature_fn=None,
-             input_fn=None, default_batch_size=1,
-             exports_to_keep=None):
-    """Exports inference graph into given dir.
-
-    Args:
-      export_dir: A string containing a directory to write the exported graph
-        and checkpoints.
-      signature_fn: Function that returns a default signature and a named
-        signature map, given `Tensor` of `Example` strings, `dict` of `Tensor`s
-        for features and `Tensor` or `dict` of `Tensor`s for predictions.
-      input_fn: If `use_deprecated_input_fn` is true, then a function that given
-        `Tensor` of `Example` strings, parses it into features that are then
-        passed to the model. Otherwise, a function that takes no argument and
-        returns a tuple of (features, targets), where features is a dict of
-        string key to `Tensor` and targets is a `Tensor` that's currently not
-        used (and so can be `None`).
-      default_batch_size: Default batch size of the `Example` placeholder.
-      exports_to_keep: Number of exports to keep.
-
-    Returns:
-      The string path to the exported directory. NB: this functionality was
-      added ca. 2016/09/25; clients that depend on the return value may need
-      to handle the case where this function returns None because subclasses
-      are not returning a value.
-    """
-    def default_input_fn(unused_estimator, examples):
-      return layers.parse_feature_columns_from_examples(
-          examples, self._feature_columns)
-    return self._estimator.export(export_dir=export_dir,
-                                  signature_fn=signature_fn,
-                                  input_fn=input_fn or default_input_fn,
-                                  default_batch_size=default_batch_size,
-                                  exports_to_keep=exports_to_keep)
-
-  def get_variable_names(self):
-    return self._estimator.get_variable_names()
-
-  @property
-  def model_dir(self):
-    return self._model_dir
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier_test.py
@ -1,459 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-"""Tests for DNNSampledSoftmaxClassifier estimator."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-import tempfile
-
-import numpy as np
-import tensorflow as tf
-
-from tensorflow.contrib.learn.python.learn.estimators import dnn_sampled_softmax_classifier
-from tensorflow.python.ops import math_ops
-
-
-class DNNSampledSoftmaxClassifierTest(tf.test.TestCase):
-
-  def testMultiClass(self):
-    """Tests the following.
-
-    1. Tests fit() and evaluate() calls.
-    2. Tests the use of a non default optimizer.
-    3. Tests the output of get_variable_names().
-    Note that the training output is not verified because it is flaky with the
-    Iris dataset.
-    """
-    def _iris_input_fn():
-      iris = tf.contrib.learn.datasets.load_iris()
-      return {
-          'feature': tf.constant(iris.data, dtype=tf.float32)
-      }, tf.constant(iris.target, shape=[150, 1], dtype=tf.int64)
-
-    cont_features = [
-        tf.contrib.layers.real_valued_column('feature', dimension=4)]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=1,
-        n_labels=1,
-        feature_columns=cont_features,
-        hidden_units=[3, 3])
-
-    classifier.fit(input_fn=_iris_input_fn, steps=5)
-    classifier.evaluate(input_fn=_iris_input_fn, steps=1)
-    var_names = classifier.get_variable_names()
-    self.assertGreater(len(var_names), 6)
-
-  def testNonDictFeatures(self):
-    """Tests non-dictionary features runs without error."""
-
-    def _iris_input_fn():
-      iris = tf.contrib.learn.datasets.load_iris()
-      return (tf.constant(
-          iris.data, dtype=tf.float32), tf.constant(
-              iris.target, shape=[150, 1], dtype=tf.int64))
-
-    cont_features = [tf.contrib.layers.real_valued_column('', dimension=4)]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=1,
-        n_labels=1,
-        feature_columns=cont_features,
-        hidden_units=[3, 3])
-
-    classifier.fit(input_fn=_iris_input_fn, steps=5)
-    classifier.evaluate(input_fn=_iris_input_fn, steps=1)
-
-  def testOneDimensionTargets(self):
-    """Tests one dimensional targets runs without error."""
-
-    def _input_fn():
-      return {
-          'feature': tf.constant(
-              [1, 1, 1], dtype=tf.float32)
-      }, tf.constant(
-          [3, 5, 7], dtype=tf.int64)
-
-    cont_features = [
-        tf.contrib.layers.real_valued_column(
-            'feature', dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=10,
-        n_samples=1,
-        n_labels=1,
-        feature_columns=cont_features,
-        hidden_units=[3, 3])
-
-    classifier.fit(input_fn=_input_fn, steps=5)
-    classifier.evaluate(input_fn=_input_fn, steps=1)
-
-  def testWrongDimensionTargets(self):
-    """Tests one dimensional targets runs without error."""
-
-    def _input_fn():
-      return {
-          'feature': tf.constant(
-              [1, 1, 1], dtype=tf.float32)
-      }, tf.constant(
-          [[[3, 5, 7]]], dtype=tf.int64)
-
-    cont_features = [
-        tf.contrib.layers.real_valued_column(
-            'feature', dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=10,
-        n_samples=1,
-        n_labels=1,
-        feature_columns=cont_features,
-        hidden_units=[3, 3])
-
-    with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, 'target'):
-      classifier.fit(input_fn=_input_fn, steps=5)
-
-  def testTrainWithPartitionedVariables(self):
-    """Tests the following.
-
-    1. Tests training with partitioned variables.
-    2. Test that the model actually trains.
-    3. Tests the output of evaluate() and predict().
-    """
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    # The given hash_bucket_size results in variables larger than the
-    # default min_slice_size attribute, so the variables are partitioned.
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=2e7)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4],
-        # Because we did not start a distributed cluster, we need to pass an
-        # empty ClusterSpec, otherwise the device_setter will look for
-        # distributed jobs, such as "/job:ps" which are not present.
-        config=tf.contrib.learn.RunConfig(
-            num_ps_replicas=2, cluster_spec=tf.train.ClusterSpec({}),
-            tf_random_seed=5))
-
-    # Test that the model actually trains.
-    classifier.fit(input_fn=_input_fn, steps=50)
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1)
-    self.assertGreater(evaluate_output['precision_at_1'], 0.6)
-    self.assertGreater(evaluate_output['recall_at_1'], 0.6)
-
-    # Test the output of predict()
-    predict_output = classifier.predict(input_fn=_input_fn)
-    self.assertListEqual([3], list(predict_output.shape))
-    # TODO(dnivara): Setup this test such that it is not flaky and predict() and
-    # evaluate() outputs can be tested.
-
-  def testTrainSaveLoad(self):
-    """Tests that ensure that you can save and reload a trained model."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=10)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    model_dir = tempfile.mkdtemp()
-    classifier1 = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        model_dir=model_dir,
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4])
-
-    classifier1.fit(input_fn=_input_fn, steps=1)
-    predict_output1 = classifier1.predict(input_fn=_input_fn)
-    del classifier1
-
-    classifier2 = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        model_dir=model_dir,
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4])
-
-    predict_output2 = classifier2.predict(input_fn=_input_fn)
-    self.assertEqual(list(predict_output1), list(predict_output2))
-
-  def testCustomOptimizerByObject(self):
-    """Tests the use of custom optimizer."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4],
-        optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
-        config=tf.contrib.learn.RunConfig(tf_random_seed=5))
-
-    # Test that the model actually trains.
-    classifier.fit(input_fn=_input_fn, steps=50)
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1)
-    self.assertGreater(evaluate_output['precision_at_1'], 0.9)
-    self.assertGreater(evaluate_output['recall_at_1'], 0.9)
-
-    # Test the output of predict()
-    predict_output = classifier.predict(input_fn=_input_fn)
-    self.assertListEqual([1, 0, 0], list(predict_output))
-
-  def testCustomOptimizerByFunction(self):
-    """Tests the use of custom optimizer."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-    def _optimizer_exp_decay():
-      global_step = tf.contrib.framework.get_global_step()
-      learning_rate = tf.train.exponential_decay(learning_rate=0.01,
-                                                 global_step=global_step,
-                                                 decay_steps=100,
-                                                 decay_rate=0.001)
-      return tf.train.AdagradOptimizer(learning_rate=learning_rate)
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4],
-        optimizer=_optimizer_exp_decay,
-        config=tf.contrib.learn.RunConfig(tf_random_seed=5))
-
-    # Test that the model actually trains.
-    classifier.fit(input_fn=_input_fn, steps=50)
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1)
-    self.assertGreater(evaluate_output['precision_at_1'], 0.6)
-    self.assertGreater(evaluate_output['recall_at_1'], 0.6)
-
-  def testExport(self):
-    """Tests that export model for servo works."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=100)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4])
-
-    export_dir = tempfile.mkdtemp()
-    classifier.fit(input_fn=_input_fn, steps=50)
-    classifier.export(export_dir)
-
-  def testPredictAsIterable(self):
-    """Tests predict() and predict_proba() call with as_iterable set to True."""
-    def _input_fn(num_epochs=None):
-      features = {
-          'age': tf.train.limit_epochs(tf.constant([[.9], [.1], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    feature_columns = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1),
-        tf.contrib.layers.real_valued_column('age')
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=feature_columns,
-        hidden_units=[4, 4])
-
-    classifier.fit(input_fn=_input_fn, steps=1)
-
-    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
-    # Test the output of predict() and predict_proba() with as_iterable=True
-    predictions = list(
-        classifier.predict(input_fn=predict_input_fn, as_iterable=True))
-    predictions_proba = list(
-        classifier.predict_proba(input_fn=predict_input_fn, as_iterable=True))
-    self.assertTrue(np.array_equal(predictions,
-                                   np.argmax(predictions_proba, 1)))
-
-  def testCustomMetrics(self):
-    """Tests the use of custom metric."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    def _my_metric_op(predictions, targets):
-      """Simply multiplies predictions and targets to return [1, 0 , 0]."""
-      prediction_classes = math_ops.argmax(predictions, 1)
-      return tf.mul(prediction_classes, tf.reshape(targets, [-1]))
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4],
-        optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
-        config=tf.contrib.learn.RunConfig(tf_random_seed=5))
-
-    # Test that the model actually trains.
-    classifier.fit(input_fn=_input_fn, steps=50)
-    metrics = {('my_metric', 'probabilities'): _my_metric_op}
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1,
-                                          metrics=metrics)
-    self.assertListEqual([1, 0, 0], list(evaluate_output['my_metric']))
-
-  def testMultiLabelTopKWithCustomMetrics(self):
-    """Tests the cases where n_labels>1 top_k>1 and custom metrics on top_k."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[0, 1], [0, 1], [0, 1]], dtype=tf.int64)
-      return features, target
-
-    def _my_metric_op(predictions, targets):
-      """Simply adds the predictions and targets."""
-      return tf.add(math_ops.to_int64(predictions), targets)
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=2,
-        top_k=2,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4],
-        optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
-        config=tf.contrib.learn.RunConfig(tf_random_seed=5))
-
-    classifier.fit(input_fn=_input_fn, steps=50)
-    # evaluate() without custom metrics.
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1)
-    self.assertGreater(evaluate_output['precision_at_1'], 0.4)
-    self.assertGreater(evaluate_output['recall_at_1'], 0.4)
-    self.assertGreater(evaluate_output['precision_at_2'], 0.4)
-    self.assertGreater(evaluate_output['recall_at_2'], 0.4)
-    self.assertGreater(evaluate_output['average_precision_at_2'], 0.4)
-
-    # evaluate() with custom metrics.
-    metrics = {('my_metric', 'top_k'): _my_metric_op}
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1,
-                                          metrics=metrics)
-    # This test's output is flaky so just testing that 'my_metric' is indeed
-    # part of the evaluate_output.
-    self.assertTrue('my_metric' in evaluate_output)
-
-    # predict() with top_k.
-    predict_output = classifier.predict(input_fn=_input_fn, get_top_k=True)
-    self.assertListEqual([3, 2], list(predict_output.shape))
-    # TODO(dnivara): Setup this test such that it is not flaky and predict() and
-    # evaluate() outputs can be tested.
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
@ -114,11 +114,13 @@ class DNNClassifierTest(tf.test.TestCase):
    """Tests binary classification using tensor data as input."""
    def _input_fn(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [0.2], [.1]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      return features, tf.constant([[1], [0], [0]], dtype=tf.int32)

@ -149,11 +151,13 @@ class DNNClassifierTest(tf.test.TestCase):
    """Tests binary classification with float labels."""
    def _input_fn_float_label(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[50], [20], [10]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[50], [20], [10]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      target = tf.constant([[0.8], [0.], [0.2]], dtype=tf.float32)
      return features, target
@ -334,11 +338,13 @@ class DNNClassifierTest(tf.test.TestCase):
    """Tests predict and predict_prob methods with as_iterable=False."""
    def _input_fn(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      return features, tf.constant([[1], [0], [0]], dtype=tf.int32)

@ -370,11 +376,13 @@ class DNNClassifierTest(tf.test.TestCase):
    """Tests predict and predict_prob methods with as_iterable=True."""
    def _input_fn(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      return features, tf.constant([[1], [0], [0]], dtype=tf.int32)

@ -407,15 +415,19 @@ class DNNClassifierTest(tf.test.TestCase):

  def testCustomMetrics(self):
    """Tests custom evaluation metrics."""
-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
      target = tf.constant([[1], [0], [0], [0]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {
+          'x': tf.train.limit_epochs(
+              tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs),
+      }
      return features, target

    def _my_metric_op(predictions, targets):
      # For the case of binary classification, the 2nd column of "predictions"
      # denotes the model predictions.
+      targets = tf.to_float(targets)
      predictions = tf.slice(predictions, [0, 1], [-1, 1])
      targets = math_ops.cast(targets, predictions.dtype)
      return tf.reduce_sum(tf.mul(predictions, targets))
@ -425,9 +437,9 @@ class DNNClassifierTest(tf.test.TestCase):
        hidden_units=[3, 3],
        config=tf.contrib.learn.RunConfig(tf_random_seed=1))

-    classifier.fit(input_fn=_input_fn_train, steps=100)
+    classifier.fit(input_fn=_input_fn, steps=100)
    scores = classifier.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
        steps=100,
        metrics={
            'my_accuracy': MetricSpec(
@ -443,7 +455,8 @@ class DNNClassifierTest(tf.test.TestCase):
    self.assertTrue(
        set(['loss', 'my_accuracy', 'my_precision', 'my_metric'
            ]).issubset(set(scores.keys())))
-    predictions = classifier.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(list(classifier.predict(input_fn=predict_input_fn)))
    self.assertEqual(_sklearn.accuracy_score([1, 0, 0, 0], predictions),
                     scores['my_accuracy'])

@ -451,7 +464,7 @@ class DNNClassifierTest(tf.test.TestCase):
    # "probabilities".
    with self.assertRaisesRegexp(KeyError, 'bad_type'):
      classifier.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
          steps=100,
          metrics={
              'bad_name': MetricSpec(
@ -462,11 +475,13 @@ class DNNClassifierTest(tf.test.TestCase):
    """Tests that insures you can save and reload a trained model."""
    def _input_fn(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      return features, tf.constant([[1], [0], [0]], dtype=tf.int32)

@ -485,7 +500,8 @@ class DNNClassifierTest(tf.test.TestCase):
        config=tf.contrib.learn.RunConfig(tf_random_seed=1))

    classifier.fit(input_fn=_input_fn, steps=100)
-    predictions1 = classifier.predict(input_fn=_input_fn)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions1 = classifier.predict(input_fn=predict_input_fn)
    del classifier

    classifier2 = tf.contrib.learn.DNNClassifier(
@ -494,18 +510,20 @@ class DNNClassifierTest(tf.test.TestCase):
        feature_columns=feature_columns,
        hidden_units=[3, 3],
        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
-    predictions2 = classifier2.predict(input_fn=_input_fn)
+    predictions2 = classifier2.predict(input_fn=predict_input_fn)
    self.assertEqual(list(predictions1), list(predictions2))

  def testTrainWithPartitionedVariables(self):
    """Tests training with partitioned variables."""
    def _input_fn(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      return features, tf.constant([[1], [0], [0]], dtype=tf.int32)

@ -636,11 +654,13 @@ class DNNRegressorTest(tf.test.TestCase):
    """Tests regression using tensor data as input."""
    def _input_fn(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      return features, tf.constant([1., 0., 0.2], dtype=tf.float32)

@ -756,11 +776,13 @@ class DNNRegressorTest(tf.test.TestCase):
    target = [1., 0., 0.2]
    def _input_fn(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      return features, tf.constant(target, dtype=tf.float32)

@ -788,11 +810,13 @@ class DNNRegressorTest(tf.test.TestCase):
    target = [1., 0., 0.2]
    def _input_fn(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      return features, tf.constant(target, dtype=tf.float32)

@ -819,10 +843,13 @@ class DNNRegressorTest(tf.test.TestCase):

  def testCustomMetrics(self):
    """Tests custom evaluation metrics."""
-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
      target = tf.constant([[1.], [0.], [0.], [0.]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {
+          'x': tf.train.limit_epochs(
+              tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs),
+      }
      return features, target

    def _my_metric_op(predictions, targets):
@ -833,9 +860,9 @@ class DNNRegressorTest(tf.test.TestCase):
        hidden_units=[3, 3],
        config=tf.contrib.learn.RunConfig(tf_random_seed=1))

-    regressor.fit(input_fn=_input_fn_train, steps=100)
+    regressor.fit(input_fn=_input_fn, steps=100)
    scores = regressor.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
        steps=1,
        metrics={
            'my_error': tf.contrib.metrics.streaming_mean_squared_error,
@ -844,28 +871,31 @@ class DNNRegressorTest(tf.test.TestCase):
    self.assertIn('loss', set(scores.keys()))
    self.assertIn('my_error', set(scores.keys()))
    self.assertIn('my_metric', set(scores.keys()))
-    predictions = regressor.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(list(regressor.predict(input_fn=predict_input_fn)))
    self.assertAlmostEqual(
        _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
        scores['my_error'])

    # Tests that when the key is a tuple, an error is raised.
-    with self.assertRaises(TypeError):
+    with self.assertRaises(KeyError):
      regressor.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
          steps=1,
-          metrics={('my_error', 'predictions'
-                   ): tf.contrib.metrics.streaming_mean_squared_error})
+          metrics={('my_error', 'predictions'):
+                   tf.contrib.metrics.streaming_mean_squared_error})

  def testTrainSaveLoad(self):
    """Tests that insures you can save and reload a trained model."""
    def _input_fn(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      return features, tf.constant([1., 0., 0.2], dtype=tf.float32)

@ -900,11 +930,13 @@ class DNNRegressorTest(tf.test.TestCase):
    """Tests training with partitioned variables."""
    def _input_fn(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      return features, tf.constant([1., 0., 0.2], dtype=tf.float32)

@ -936,11 +968,13 @@ class DNNRegressorTest(tf.test.TestCase):
    """Tests that we can disable centered bias."""
    def _input_fn(num_epochs=None):
      features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
      }
      return features, tf.constant([1., 0., 0.2], dtype=tf.float32)

--- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py
@ -27,6 +27,7 @@ from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import rnn
@ -119,6 +120,55 @@ def _select_last_activations(activations, sequence_lengths):
    return last_activations


+def _concatenate_context_input(sequence_input, context_input):
+  """Replicates `context_input` accross all timesteps of `sequence_input`.
+
+  Expands dimension 1 of `context_input` then tiles it `sequence_length` times.
+  This value is appended to `sequence_input` on dimension 2 and the result is
+  returned.
+
+  Args:
+    sequence_input: a `Tensor` of dtype `float32` and shape `[batch_size,
+      padded_length, d0]`.
+    context_input: a `Tensor` of dtype `float32` and shape `[batch_size, d1]`.
+
+  Returns:
+    A `Tensor` of dtype `float32` and shape `[batch_size, padded_length,
+    d0 + d1]`.
+
+  Raises:
+    ValueError: if `sequence_input` does not have rank 3 or `context_input` does
+      not have rank 2.
+  """
+  seq_rank_check = check_ops.assert_rank(
+      sequence_input,
+      3,
+      message='sequence_input must have rank 3',
+      data=[array_ops.shape(sequence_input)])
+  seq_type_check = check_ops.assert_type(
+      sequence_input,
+      dtypes.float32,
+      message='sequence_input must have dtype float32; got {}.'.format(
+          sequence_input.dtype))
+  ctx_rank_check = check_ops.assert_rank(
+      context_input,
+      2,
+      message='context_input must have rank 2',
+      data=[array_ops.shape(context_input)])
+  ctx_type_check = check_ops.assert_type(
+      context_input,
+      dtypes.float32,
+      message='context_input must have dtype float32; got {}.'.format(
+          context_input.dtype))
+  with ops.control_dependencies(
+      [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]):
+    padded_length = array_ops.shape(sequence_input)[1]
+    tiled_context_input = array_ops.tile(
+        array_ops.expand_dims(context_input, 1),
+        array_ops.concat(0, [[1], [padded_length], [1]]))
+  return array_ops.concat(2, [sequence_input, tiled_context_input])
+
+
@six.add_metaclass(abc.ABCMeta)
 class _DynamicRNNEstimator(estimator.BaseEstimator):
  """Estimator that uses a dynamic RNN for sequences."""
@ -127,10 +177,11 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
               cell,
               target_column,
               optimizer,
+               sequence_feature_columns,
+               context_feature_columns=None,
               model_dir=None,
               config=None,
               gradient_clipping_norm=None,
-               inputs_key='inputs',
               sequence_length_key='sequence_length',
               initial_state_key='initial_state',
               dtype=None,
@ -145,13 +196,18 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
      target_column: an initialized `TargetColumn`, used to calculate loss and
        metrics.
      optimizer: an initialized `tensorflow.Optimizer`.
+      sequence_feature_columns: An iterable containing all the feature columns
+        describing sequence features. All items in the set should be instances
+        of classes derived from `FeatureColumn`.
+      context_feature_columns: An iterable containing all the feature columns
+        describing context features i.e. features that apply accross all time
+        steps. All items in the set should be instances of classes derived from
+        `FeatureColumn`.
      model_dir: The directory in which to save and restore the model graph,
        parameters, etc.
      config: A `RunConfig` instance.
      gradient_clipping_norm: parameter used for gradient clipping. If `None`,
        then no clipping is performed.
-      inputs_key: the key for input values in the features dict passed to
-        `fit()`.
      sequence_length_key: the key for the sequence length tensor in the
        features dict passed to `fit()`.
      initial_state_key: the key for input values in the features dict passed to
@ -168,14 +224,20 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
                        targets which are the output of `input_fn` and
                        returns features and targets which will be fed
                        into the model.
+    Raises:
+      ValueError: `sequence_feature_columns` is `None` or [].
    """
    super(_DynamicRNNEstimator, self).__init__(
        model_dir=model_dir, config=config)
+    # TODO(jamieas): consider supporting models with only context features.
+    if not sequence_feature_columns:
+      raise ValueError('sequence_feature_columns must be a non-empty list.')
    self._cell = cell
    self._target_column = target_column
    self._optimizer = optimizer
+    self._context_feature_columns = context_feature_columns
+    self._sequence_feature_columns = sequence_feature_columns
    self._gradient_clipping_norm = gradient_clipping_norm
-    self._inputs_key = inputs_key
    self._sequence_length_key = sequence_length_key
    self._initial_state_key = initial_state_key
    self._dtype = dtype or dtypes.float32
@ -186,7 +248,29 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
        feature_engineering_fn or
        (lambda features, targets: (features, targets)))

-  def _construct_rnn(self, features):
+  def _get_model_input(self, features, weight_collections=None, scope=None):
+    # TODO(jamieas): add option to use context to construct initial state rather
+    # than appending it to sequence input.
+    initial_state = features.get(self._initial_state_key)
+
+    sequence_input = layers.sequence_input_from_feature_columns(
+        columns_to_tensors=features,
+        feature_columns=self._sequence_feature_columns,
+        weight_collections=weight_collections,
+        scope=scope)
+
+    if self._context_feature_columns is not None:
+      context_input = layers.input_from_feature_columns(
+          columns_to_tensors=features,
+          feature_columns=self._context_feature_columns,
+          weight_collections=weight_collections,
+          scope=scope)
+
+      sequence_input = _concatenate_context_input(sequence_input, context_input)
+
+    return initial_state, sequence_input
+
+  def _construct_rnn(self, initial_state, sequence_input):
    """Apply an RNN to `features`.

    The `features` dict must contain `self._inputs_key`, and the corresponding
@ -201,28 +285,20 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
    `self._dtype`.

    Args:
-      features: a `dict` containing the input for the RNN and (optionally) an
-        initial state and information about sequence lengths.
+      initial_state: the initial state to pass the the RNN. If `None`, the
+        default starting state for `self._cell` is used.
+      sequence_input: a `Tensor` with shape `[batch_size, padded_length, d]`
+        that will be passed as input to the RNN.

    Returns:
      activations: the output of the RNN, projected to the appropriate number of
        dimensions.
      final_state: the final state output by the RNN.
-
-    Raises:
-      KeyError: if `features` does not contain `self._inputs_key`.
    """
    with ops.name_scope('RNN'):
-      inputs = features.get(self._inputs_key)
-      if inputs is None:
-        raise KeyError('features must contain the key {}'.format(
-            self._inputs_key))
-      if inputs.dtype != self._dtype:
-        inputs = math_ops.cast(inputs, self._dtype)
-      initial_state = features.get(self._initial_state_key)
      rnn_outputs, final_state = rnn.dynamic_rnn(
          cell=self._cell,
-          inputs=inputs,
+          inputs=sequence_input,
          initial_state=initial_state,
          dtype=self._dtype,
          parallel_iterations=self._parallel_iterations,
@ -320,26 +396,26 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):

  def _get_train_ops(self, features, targets):
    with ops.name_scope(self._name):
-      if isinstance(features, ops.Tensor):
-        features = {self._inputs_key: features}
-      activations, _ = self._construct_rnn(features)
+      features, targets = self._feature_engineering_fn(features, targets)
+      initial_state, sequence_input = self._get_model_input(features)
+      activations, _ = self._construct_rnn(initial_state, sequence_input)
      loss = self._activations_to_loss(features, activations, targets)
      train_op = self._loss_to_train_op(loss)
      return train_op, loss

  def _get_eval_ops(self, features, targets, metrics):
    with ops.name_scope(self._name):
-      if isinstance(features, ops.Tensor):
-        features = {self._inputs_key: features}
-      activations, _ = self._construct_rnn(features)
+      features, targets = self._feature_engineering_fn(features, targets)
+      initial_state, sequence_input = self._get_model_input(features)
+      activations, _ = self._construct_rnn(initial_state, sequence_input)
      return self._activations_to_eval_ops(features, activations, targets,
                                           metrics)

  def _get_predict_ops(self, features):
    with ops.name_scope(self._name):
-      if isinstance(features, ops.Tensor):
-        features = {self._inputs_key: features}
-      activations, state = self._construct_rnn(features)
+      features, _ = self._feature_engineering_fn(features, {})
+      initial_state, sequence_input = self._get_model_input(features)
+      activations, state = self._construct_rnn(initial_state, sequence_input)
      predictions = self._activations_to_predictions(features, activations)
      return {'predictions': predictions, 'state': state}

@ -362,7 +438,7 @@ class _MultiValueRNNEstimator(_DynamicRNNEstimator):
      activations_shape = array_ops.shape(activations)
      flattened_activations = array_ops.reshape(activations,
                                                [-1, activations_shape[2]])
-      predictions = self._target_column.activations_to_predictions(
+      predictions = self._target_column.logits_to_predictions(
          flattened_activations, proba=False)
      reshaped_predictions = array_ops.reshape(
          predictions, [activations_shape[0], activations_shape[1], -1])
@ -392,7 +468,7 @@ class _SingleValueRNNEstimator(_DynamicRNNEstimator):
    with ops.name_scope('activations_to_predictions'):
      sequence_lengths = features.get(self._sequence_length_key)
      last_activations = _select_last_activations(activations, sequence_lengths)
-      return self._target_column.activations_to_predictions(
+      return self._target_column.logits_to_predictions(
          last_activations, proba=False)

  def _activations_to_eval_ops(self, features, activations, targets, metrics):
@ -469,6 +545,8 @@ def _get_rnn_cell(cell_type, num_units, num_layers):


 def multi_value_rnn_regressor(num_units,
+                              sequence_feature_columns,
+                              context_feature_columns=None,
                              cell_type='basic_rnn',
                              cell_dtype=dtypes.float32,
                              num_rnn_layers=1,
@ -482,6 +560,13 @@ def multi_value_rnn_regressor(num_units,

  Args:
    num_units: the size of the RNN cells.
+    sequence_feature_columns: An iterable containing all the feature columns
+      describing sequence features. All items in the set should be instances
+      of classes derived from `FeatureColumn`.
+    context_feature_columns: An iterable containing all the feature columns
+      describing context features i.e. features that apply accross all time
+      steps. All items in the set should be instances of classes derived from
+      `FeatureColumn`.
    cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'.
    cell_dtype: the dtype of the state and output for the given `cell_type`.
    num_rnn_layers: number of RNN layers.
@ -503,6 +588,8 @@ def multi_value_rnn_regressor(num_units,
  return _MultiValueRNNEstimator(cell,
                                 target_column,
                                 optimizer,
+                                 sequence_feature_columns,
+                                 context_feature_columns,
                                 model_dir,
                                 config,
                                 gradient_clipping_norm,
@ -511,6 +598,8 @@ def multi_value_rnn_regressor(num_units,

 def multi_value_rnn_classifier(num_classes,
                               num_units,
+                               sequence_feature_columns,
+                               context_feature_columns=None,
                               cell_type='basic_rnn',
                               cell_dtype=dtypes.float32,
                               num_rnn_layers=1,
@ -525,6 +614,13 @@ def multi_value_rnn_classifier(num_classes,
  Args:
    num_classes: the number of classes for categorization.
    num_units: the size of the RNN cells.
+    sequence_feature_columns: An iterable containing all the feature columns
+      describing sequence features. All items in the set should be instances
+      of classes derived from `FeatureColumn`.
+    context_feature_columns: An iterable containing all the feature columns
+      describing context features i.e. features that apply accross all time
+      steps. All items in the set should be instances of classes derived from
+      `FeatureColumn`.
    cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'.
    cell_dtype: the dtype of the state and output for the given `cell_type`.
    num_rnn_layers: number of RNN layers.
@ -546,6 +642,8 @@ def multi_value_rnn_classifier(num_classes,
  return _MultiValueRNNEstimator(cell,
                                 target_column,
                                 optimizer,
+                                 sequence_feature_columns,
+                                 context_feature_columns,
                                 model_dir,
                                 config,
                                 gradient_clipping_norm,
@ -553,6 +651,8 @@ def multi_value_rnn_classifier(num_classes,


 def single_value_rnn_regressor(num_units,
+                               sequence_feature_columns,
+                               context_feature_columns=None,
                               cell_type='basic_rnn',
                               cell_dtype=dtypes.float32,
                               num_rnn_layers=1,
@ -566,6 +666,13 @@ def single_value_rnn_regressor(num_units,

  Args:
    num_units: the size of the RNN cells.
+    sequence_feature_columns: An iterable containing all the feature columns
+      describing sequence features. All items in the set should be instances
+      of classes derived from `FeatureColumn`.
+    context_feature_columns: An iterable containing all the feature columns
+      describing context features i.e. features that apply accross all time
+      steps. All items in the set should be instances of classes derived from
+      `FeatureColumn`.
    cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'.
    cell_dtype: the dtype of the state and output for the given `cell_type`.
    num_rnn_layers: number of RNN layers.
@ -587,6 +694,8 @@ def single_value_rnn_regressor(num_units,
  return _SingleValueRNNEstimator(cell,
                                  target_column,
                                  optimizer,
+                                  sequence_feature_columns,
+                                  context_feature_columns,
                                  model_dir,
                                  config,
                                  gradient_clipping_norm,
@ -595,6 +704,8 @@ def single_value_rnn_regressor(num_units,

 def single_value_rnn_classifier(num_classes,
                                num_units,
+                                sequence_feature_columns,
+                                context_feature_columns=None,
                                cell_type='basic_rnn',
                                cell_dtype=dtypes.float32,
                                num_rnn_layers=1,
@ -609,6 +720,13 @@ def single_value_rnn_classifier(num_classes,
  Args:
    num_classes: the number of classes for categorization.
    num_units: the size of the RNN cells.
+    sequence_feature_columns: An iterable containing all the feature columns
+      describing sequence features. All items in the set should be instances
+      of classes derived from `FeatureColumn`.
+    context_feature_columns: An iterable containing all the feature columns
+      describing context features i.e. features that apply accross all time
+      steps. All items in the set should be instances of classes derived from
+      `FeatureColumn`.
    cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'.
    cell_dtype: the dtype of the state and output for the given `cell_type`.
    num_rnn_layers: number of RNN layers.
@ -630,6 +748,8 @@ def single_value_rnn_classifier(num_classes,
  return _SingleValueRNNEstimator(cell,
                                  target_column,
                                  optimizer,
+                                  sequence_feature_columns,
+                                  context_feature_columns,
                                  model_dir,
                                  config,
                                  gradient_clipping_norm,
--- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py
@ -22,6 +22,7 @@ import numpy as np
 import tensorflow as tf

 from tensorflow.contrib.learn.python.learn.estimators import dynamic_rnn_estimator
+from tensorflow.python.ops import rnn_cell


 class IdentityRNNCell(tf.nn.rnn_cell.RNNCell):
@ -44,16 +45,16 @@ class IdentityRNNCell(tf.nn.rnn_cell.RNNCell):

 class MockTargetColumn(object):

-  def __init__(self):
-    self._num_label_columns = None
+  def __init__(self, num_label_columns=None):
+    self._num_label_columns = num_label_columns

  def get_eval_ops(self, features, activations, targets, metrics):
    raise NotImplementedError(
        'MockTargetColumn.get_eval_ops called unexpectedly.')

-  def activations_to_predictions(self, flattened_activations, proba=False):
+  def logits_to_predictions(self, flattened_activations, proba=False):
    raise NotImplementedError(
-        'MockTargetColumn.activations_to_predictions called unexpectedly.')
+        'MockTargetColumn.logits_to_predictions called unexpectedly.')

  def loss(self, activations, targets, features):
    raise NotImplementedError('MockTargetColumn.loss called unexpectedly.')
@ -88,67 +89,89 @@ def sequence_length_mask(values, lengths):

 class DynamicRnnEstimatorTest(tf.test.TestCase):

-  CELL_STATE_SIZE = 8
-  CELL_OUTPUT_SIZE = 6
+  NUM_RNN_CELL_UNITS = 8
+  NUM_LABEL_COLUMNS = 4

  def setUp(self):
-    self._rnn_cell = IdentityRNNCell(self.CELL_STATE_SIZE,
-                                     self.CELL_OUTPUT_SIZE)
-    self._mock_target_column = MockTargetColumn()
+    self._rnn_cell = rnn_cell.BasicRNNCell(self.NUM_RNN_CELL_UNITS)
+    self._mock_target_column = MockTargetColumn(
+        num_label_columns=self.NUM_LABEL_COLUMNS)
+
+    location = tf.contrib.layers.sparse_column_with_keys(
+        'location', keys=['west_side', 'east_side', 'nyc'])
+    location_onehot = tf.contrib.layers.one_hot_column(location)
+    context_features = [location_onehot]
+
+    wire_cast = tf.contrib.layers.sparse_column_with_keys(
+        'wire_cast', ['marlo', 'omar', 'stringer'])
+    wire_cast_embedded = tf.contrib.layers.embedding_column(
+        wire_cast, dimension=8)
+    measurements = tf.contrib.layers.real_valued_column(
+        'measurements', dimension=2)
+    sequence_features = [measurements, wire_cast_embedded]
+
    self._rnn_estimator = dynamic_rnn_estimator._MultiValueRNNEstimator(
        cell=self._rnn_cell,
+        sequence_feature_columns=sequence_features,
+        context_feature_columns=context_features,
        target_column=self._mock_target_column,
        optimizer=tf.train.GradientDescentOptimizer(0.1))

+    self._columns_to_tensors = {
+        'location': tf.SparseTensor(
+            indices=[[0, 0], [1, 0], [2, 0]],
+            values=['west_side', 'west_side', 'nyc'],
+            shape=[3, 1]),
+        'wire_cast': tf.SparseTensor(
+            indices=[[0, 0, 0], [0, 1, 0],
+                     [1, 0, 0], [1, 1, 0], [1, 1, 1],
+                     [2, 0, 0]],
+            values=[b'marlo', b'stringer',
+                    b'omar', b'stringer', b'marlo',
+                    b'marlo'],
+            shape=[3, 2, 2]),
+        'measurements': tf.random_uniform([3, 2, 2])}
+
+  def testGetModelInput(self):
+    initial_state, sequence_input = self._rnn_estimator._get_model_input(
+        self._columns_to_tensors)
+    self.assertIsNone(initial_state)
+    with self.test_session() as sess:
+      sess.run(tf.initialize_all_variables())
+      sess.run(tf.initialize_all_tables())
+      sequence_input_val = sess.run(sequence_input)
+    expected_shape = np.array([
+        3,         # expected batch size
+        2,         # padded sequence length
+        3 + 8 + 2  # location keys + embedding dim + measurement dimension
+    ])
+    self.assertAllEqual(expected_shape, sequence_input_val.shape)
+
  def testConstructRNN(self):
    """Test `DynamicRNNEstimator._construct_rnn`."""
-    batch_size = 4
-    padded_length = 6
-    num_classes = 4
+    initial_state, sequence_input = self._rnn_estimator._get_model_input(
+        self._columns_to_tensors)
+    activations_t, final_state_t = self._rnn_estimator._construct_rnn(
+        initial_state, sequence_input)

-    # Set up mocks
-    self._mock_target_column.set_num_label_columns(num_classes)
-    np.random.seed(111)
-    mock_linear_layer_output = np.random.rand(
-        batch_size, padded_length, num_classes)
-
-    # Create features
-    inputs = np.random.rand(batch_size, padded_length, self.CELL_OUTPUT_SIZE)
-    sequence_length = np.random.randint(0, padded_length + 1, batch_size)
-    features = {'inputs': tf.constant(
-        inputs, dtype=tf.float32),
-                'sequence_length': tf.constant(
-                    sequence_length, dtype=tf.int32)}
-
-    # Map feature to activations with mocked linear layer.
-    with tf.test.mock.patch.object(dynamic_rnn_estimator,
-                                   'layers') as mock_layers:
-      mock_layers.fully_connected.return_value = tf.constant(
-          mock_linear_layer_output, dtype=tf.float32)
-      activations_t, final_state_t = self._rnn_estimator._construct_rnn(
-          features)
-      _, fully_connected_kwargs = mock_layers.fully_connected.call_args
-      linear_layer_inputs_t = fully_connected_kwargs['inputs']
-      linear_layer_output_dim = fully_connected_kwargs['num_outputs']
-
-    # Obtain values of linear layer input, activations and final state.
+    # Obtain values of activations and final state.
    with tf.Session() as sess:
      sess.run(tf.initialize_all_variables())
-      linear_layer_inputs, activations, final_state = sess.run(
-          [linear_layer_inputs_t, activations_t, final_state_t])
+      sess.run(tf.initialize_all_tables())
+      activations, final_state = sess.run([activations_t, final_state_t])

-    np.testing.assert_equal(num_classes, linear_layer_output_dim)
-    np.testing.assert_almost_equal(inputs, linear_layer_inputs)
-    np.testing.assert_almost_equal(mock_linear_layer_output, activations)
-    np.testing.assert_almost_equal(
-        np.zeros([batch_size, self._rnn_cell.state_size], dtype=float),
-        final_state)
+    expected_activations_shape = np.array([3, 2, self.NUM_LABEL_COLUMNS])
+    self.assertAllEqual(expected_activations_shape, activations.shape)
+    expected_state_shape = np.array([3, self.NUM_RNN_CELL_UNITS])
+    self.assertAllEqual(expected_state_shape, final_state.shape)


 class MultiValueRNNEstimatorTest(tf.test.TestCase):
  """Tests for `_MultiValueRNNEstimator` class."""
  CELL_STATE_SIZE = 8
  CELL_OUTPUT_SIZE = 6
+  INPUTS_COLUMN = tf.contrib.layers.real_valued_column(
+      'inputs', dimension=CELL_OUTPUT_SIZE)

  def setUp(self):
    self._rnn_cell = IdentityRNNCell(self.CELL_STATE_SIZE,
@ -156,6 +179,7 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase):
    self._mock_target_column = MockTargetColumn()
    self._seq_estimator = dynamic_rnn_estimator._MultiValueRNNEstimator(
        cell=self._rnn_cell,
+        sequence_feature_columns=[self.INPUTS_COLUMN],
        target_column=self._mock_target_column,
        optimizer=tf.train.GradientDescentOptimizer(0.1))

@ -251,13 +275,13 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase):

    with tf.test.mock.patch.object(
        self._mock_target_column,
-        'activations_to_predictions',
+        'logits_to_predictions',
        return_value=flattened_argmax,
-        autospec=True) as mock_activations_to_predictions:
+        autospec=True) as mock_logits_to_predictions:
      predictions_t = self._seq_estimator._activations_to_predictions(
          None, tf.constant(activations, dtype=tf.float32))
      (target_column_input_activations_t,
-      ), _ = mock_activations_to_predictions.call_args
+      ), _ = mock_logits_to_predictions.call_args

    with tf.Session() as sess:
      target_column_input_activations, predictions = sess.run(
@ -294,9 +318,14 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase):

      return input_fn

+    seq_columns = [tf.contrib.layers.real_valued_column(
+        'inputs', dimension=cell_size)]
    config = tf.contrib.learn.RunConfig(tf_random_seed=1234)
    sequence_estimator = dynamic_rnn_estimator.multi_value_rnn_regressor(
-        num_units=cell_size, learning_rate=learning_rate, config=config)
+        num_units=cell_size,
+        sequence_feature_columns=seq_columns,
+        learning_rate=learning_rate,
+        config=config)

    train_input_fn = get_sin_input_fn(
        batch_size, sequence_length, np.pi / 32, seed=1234)
@ -336,10 +365,13 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase):
        return {'inputs': inputs}, labels
      return input_fn

+    seq_columns = [tf.contrib.layers.real_valued_column(
+        'inputs', dimension=cell_size)]
    config = tf.contrib.learn.RunConfig(tf_random_seed=21212)
    sequence_estimator = dynamic_rnn_estimator.multi_value_rnn_classifier(
        num_classes=2,
        num_units=cell_size,
+        sequence_feature_columns=seq_columns,
        learning_rate=learning_rate,
        config=config)

@ -421,9 +453,12 @@ class SingleValueRNNEstimatorTest(tf.test.TestCase):
        return {'inputs': inputs}, labels
      return input_fn

+    seq_columns = [tf.contrib.layers.real_valued_column(
+        'inputs', dimension=cell_size)]
    config = tf.contrib.learn.RunConfig(tf_random_seed=6)
    sequence_regressor = dynamic_rnn_estimator.single_value_rnn_regressor(
        num_units=cell_size,
+        sequence_feature_columns=seq_columns,
        cell_type=cell_type,
        optimizer_type=optimizer_type,
        learning_rate=learning_rate,
@ -467,10 +502,13 @@ class SingleValueRNNEstimatorTest(tf.test.TestCase):
        return {'inputs': inputs}, labels
      return input_fn

+    seq_columns = [tf.contrib.layers.real_valued_column(
+        'inputs', dimension=cell_size)]
    config = tf.contrib.learn.RunConfig(tf_random_seed=77)
    sequence_classifier = dynamic_rnn_estimator.single_value_rnn_classifier(
        num_classes=2,
        num_units=cell_size,
+        sequence_feature_columns=seq_columns,
        cell_type=cell_type,
        optimizer_type=optimizer_type,
        learning_rate=learning_rate,
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function

 import abc
+import collections
 import copy
 import inspect
 import itertools
@ -52,6 +53,8 @@ from tensorflow.contrib.learn.python.learn.utils import export
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import device_setter
@ -81,6 +84,12 @@ class ModeKeys(object):
  INFER = 'infer'


+class ModelFnOps(
+    collections.namedtuple('ModelFnOps', ['predictions', 'loss', 'training_op',
+                                          'default_metrics', 'signature_fn'])):
+  pass
+
+
 def _get_input_fn(x, y, input_fn, feed_fn, batch_size, shuffle=False, epochs=1):
  """Make inputs into input and feed functions."""
  if input_fn is None:
@ -230,6 +239,9 @@ def _make_metrics_ops(metrics, features, targets, predictions):

    if isinstance(name, tuple):
      # Multi-head metrics.
+      if len(name) != 2:
+        raise ValueError('Invalid metric for {}. It returned a tuple with '
+                         'len {}, expected 2.'.format(name, len(name)))
      if not isinstance(predictions, dict):
        raise ValueError(
            'Metrics passed provide (name, prediction), '
@ -371,7 +383,7 @@ class BaseEstimator(
          provided.
    """
    logging.warning('The current implementation of partial_fit is not optimized'
-                    'for use in a loop. Consider using fit() instead.')
+                    ' for use in a loop. Consider using fit() instead.')
    return self.fit(x=x, y=y, input_fn=input_fn, steps=steps,
                    batch_size=batch_size, monitors=monitors)

@ -405,7 +417,7 @@ class BaseEstimator(
      AS_ITERABLE_DATE, AS_ITERABLE_INSTRUCTIONS, as_iterable=False)
  def predict(
      self, x=None, input_fn=None, batch_size=None, outputs=None,
-      as_iterable=False):
+      as_iterable=True):
    """Returns predictions for given features.

    Args:
@ -602,26 +614,26 @@ class BaseEstimator(

  def _check_inputs(self, features, targets):
    if self._features_info is not None:
-      logging.warning('Given features: %s, required signatures: %s.',
-                      str(features), str(self._features_info))
+      logging.debug('Given features: %s, required signatures: %s.',
+                    str(features), str(self._features_info))
      if not tensor_signature.tensors_compatible(features, self._features_info):
        raise ValueError('Features are incompatible with given information. '
                         'Given features: %s, required signatures: %s.' %
                         (str(features), str(self._features_info)))
    else:
      self._features_info = tensor_signature.create_signatures(features)
-      logging.info('Setting feature info to %s', str(self._features_info))
+      logging.debug('Setting feature info to %s.', str(self._features_info))
    if targets is not None:
      if self._targets_info is not None:
-        logging.warning('Given targets: %s, required signatures: %s.',
-                        str(targets), str(self._targets_info))
+        logging.debug('Given targets: %s, required signatures: %s.',
+                      str(targets), str(self._targets_info))
        if not tensor_signature.tensors_compatible(targets, self._targets_info):
          raise ValueError('Targets are incompatible with given information. '
                           'Given targets: %s, required signatures: %s.' %
                           (str(targets), str(self._targets_info)))
      else:
        self._targets_info = tensor_signature.create_signatures(targets)
-        logging.info('Setting targets info to %s', str(self._targets_info))
+        logging.debug('Setting targets info to %s', str(self._targets_info))

  def _train_model(self,
                   input_fn,
@ -781,7 +793,7 @@ class BaseEstimator(
    return result

  def _infer_model(
-      self, input_fn, feed_fn=None, outputs=None, as_iterable=False):
+      self, input_fn, feed_fn=None, outputs=None, as_iterable=True):
    # Check that model has been trained.
    checkpoint_path = saver.latest_checkpoint(self._model_dir)
    if not checkpoint_path:
@ -883,8 +895,15 @@ class Estimator(BaseEstimator):

    Args:
      model_fn: Model function, takes features and targets tensors or dicts of
-                tensors and returns predictions and loss tensors.
-                Supports next three signatures for the function:
+                tensors and returns tuple of:
+
+          * predictions: `Tensor`, `SparseTensor` or dictionary of same.
+              Can also be any type that is convertible to a `Tensor` or
+              `SparseTensor`, or dictionary of same.
+          * loss: Scalar loss `Tensor`.
+          * train_op: Training update `Tensor` or `Operation`.
+
+         Supports next three signatures for the function:

          * `(features, targets) -> (predictions, loss, train_op)`
          * `(features, targets, mode) -> (predictions, loss, train_op)`
@ -929,7 +948,7 @@ class Estimator(BaseEstimator):
                         'arguments, but not None params (%s) are passed.' %
                         (model_fn, params))
      if params is None and 'params' in model_fn_args:
-        logging.warning('Estimator\'s model_fn (%s) has includes params '
+        logging.warning('Estimator\'s model_fn (%s) includes params '
                        'argument, but params are not passed to Estimator.',
                        model_fn)
    self._model_fn = model_fn
@ -943,10 +962,48 @@ class Estimator(BaseEstimator):
    model_fn_args = _get_arguments(self._model_fn)
    if 'mode' in model_fn_args:
      if 'params' in model_fn_args:
-        return self._model_fn(features, targets, mode=mode, params=self.params)
+        predictions, loss, train_op = self._model_fn(
+            features, targets, mode=mode, params=self.params)
      else:
-        return self._model_fn(features, targets, mode=mode)
-    return self._model_fn(features, targets)
+        predictions, loss, train_op = self._model_fn(
+            features, targets, mode=mode)
+    else:
+      predictions, loss, train_op = self._model_fn(features, targets)
+
+    # Validate train_op.
+    if train_op is None:
+      if mode == ModeKeys.TRAIN:
+        raise ValueError('Missing train_op.')
+    elif not isinstance(train_op, ops.Operation):
+      train_op = ops.convert_to_tensor(train_op).op
+
+    # Validate loss.
+    if loss is None:
+      if mode in (ModeKeys.TRAIN, ModeKeys.EVAL):
+        raise ValueError('Missing loss.')
+    else:
+      loss = ops.convert_to_tensor(loss)
+      loss_shape = loss.get_shape()
+      if loss_shape.num_elements() not in (None, 1):
+        raise ValueError('Loss must be scalar: %s.' % loss)
+      if not loss_shape.is_compatible_with(tensor_shape.scalar()):
+        loss = array_ops.reshape(loss, [])
+
+    # Validate predictions.
+    if predictions is None:
+      if mode == ModeKeys.INFER:
+        raise ValueError('Missing predictions.')
+    else:
+      if isinstance(predictions, dict):
+        predictions = {
+            k: contrib_framework.convert_to_tensor_or_sparse_tensor(v)
+            for k, v in six.iteritems(predictions)
+        }
+      else:
+        predictions = contrib_framework.convert_to_tensor_or_sparse_tensor(
+            predictions)
+
+    return predictions, loss, train_op

  def _get_train_ops(self, features, targets):
    """Method that builds model graph and returns trainer ops.
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
@ -37,9 +37,9 @@ _IRIS_INPUT_DIM = 4

 def boston_input_fn(num_epochs=None):
  boston = tf.contrib.learn.datasets.load_boston()
-  features = tf.reshape(tf.constant(boston.data), [-1, _BOSTON_INPUT_DIM])
-  if num_epochs:
-    features = tf.train.limit_epochs(features, num_epochs=num_epochs)
+  features = tf.train.limit_epochs(
+      tf.reshape(tf.constant(boston.data), [-1, _BOSTON_INPUT_DIM]),
+      num_epochs=num_epochs)
  target = tf.reshape(tf.constant(boston.target), [-1, 1])
  return features, target

@ -61,7 +61,10 @@ def boston_eval_fn():


 def linear_model_params_fn(features, target, mode, params):
-  assert mode in ('train', 'eval', 'infer')
+  assert mode in (
+      tf.contrib.learn.ModeKeys.TRAIN,
+      tf.contrib.learn.ModeKeys.EVAL,
+      tf.contrib.learn.ModeKeys.INFER)
  prediction, loss = (
      tf.contrib.learn.models.linear_regression_zero_init(features, target)
  )
@ -72,7 +75,10 @@ def linear_model_params_fn(features, target, mode, params):


 def linear_model_fn(features, target, mode):
-  assert mode in ('train', 'eval', 'infer')
+  assert mode in (
+      tf.contrib.learn.ModeKeys.TRAIN,
+      tf.contrib.learn.ModeKeys.EVAL,
+      tf.contrib.learn.ModeKeys.INFER)
  prediction, loss = (
      tf.contrib.learn.models.linear_regression_zero_init(features, target)
  )
@ -120,6 +126,46 @@ class CheckCallsMonitor(tf.contrib.learn.monitors.BaseMonitor):

 class EstimatorTest(tf.test.TestCase):

+  def testInvalidModelFn_no_train_op(self):
+    def _invalid_model_fn(features, target):
+      # pylint: disable=unused-argument
+      tf.Variable(42.0, 'weight')
+      return None, None, None
+    est = tf.contrib.learn.Estimator(model_fn=_invalid_model_fn)
+    with self.assertRaisesRegexp(ValueError, 'Missing train_op'):
+      est.fit(input_fn=boston_input_fn, steps=1)
+
+  def testInvalidModelFn_no_loss(self):
+    def _invalid_model_fn(features, target, mode):
+      # pylint: disable=unused-argument
+      w = tf.Variable(42.0, 'weight')
+      loss = 100.0 - w
+      train_op = w.assign_add(loss / 100.0)
+      if mode == tf.contrib.learn.ModeKeys.EVAL:
+        loss = None
+      return None, loss, train_op
+    est = tf.contrib.learn.Estimator(model_fn=_invalid_model_fn)
+    est.fit(input_fn=boston_input_fn, steps=1)
+    with self.assertRaisesRegexp(ValueError, 'Missing loss'):
+      est.evaluate(input_fn=boston_eval_fn, steps=1)
+
+  def testInvalidModelFn_no_prediction(self):
+    def _invalid_model_fn(features, target):
+      # pylint: disable=unused-argument
+      w = tf.Variable(42.0, 'weight')
+      loss = 100.0 - w
+      train_op = w.assign_add(loss / 100.0)
+      return None, loss, train_op
+    est = tf.contrib.learn.Estimator(model_fn=_invalid_model_fn)
+    est.fit(input_fn=boston_input_fn, steps=1)
+    est.evaluate(input_fn=boston_eval_fn, steps=1)
+    with self.assertRaisesRegexp(ValueError, 'Missing prediction'):
+      est.predict(input_fn=boston_input_fn)
+    with self.assertRaisesRegexp(ValueError, 'Missing prediction'):
+      est.predict(
+          input_fn=functools.partial(boston_input_fn, num_epochs=1),
+          as_iterable=True)
+
  def testCustomConfig(self):
    test_random_seed = 5783452

@ -211,7 +257,7 @@ class EstimatorTest(tf.test.TestCase):
        metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
    self.assertAllClose(scores2['MSE'],
                        scores['MSE'])
-    predictions = est2.predict(x=boston.data)
+    predictions = np.array(list(est2.predict(x=boston.data)))
    other_score = _sklearn.mean_squared_error(predictions, float64_target)
    self.assertAllClose(other_score, scores['MSE'])

@ -238,7 +284,7 @@ class EstimatorTest(tf.test.TestCase):
        x=boston.data,
        y=float64_target,
        metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
-    predictions = est.predict(x=boston.data)
+    predictions = np.array(list(est.predict(x=boston.data)))
    other_score = _sklearn.mean_squared_error(predictions, boston.target)
    self.assertAllClose(other_score, scores['MSE'])
    self.assertTrue('global_step' in scores)
@ -252,13 +298,17 @@ class EstimatorTest(tf.test.TestCase):
        x=iris.data,
        y=iris.target,
        metrics={('accuracy', 'class'): tf.contrib.metrics.streaming_accuracy})
-    predictions = est.predict(x=iris.data)
-    predictions_class = est.predict(x=iris.data, outputs=['class'])
-    self.assertEqual(predictions['class'].shape[0], iris.target.shape[0])
-    self.assertAllClose(predictions['class'], predictions_class['class'])
-    self.assertAllClose(predictions['class'], np.argmax(predictions['prob'],
-                                                        axis=1))
-    other_score = _sklearn.accuracy_score(iris.target, predictions['class'])
+    predictions = list(est.predict(x=iris.data))
+    predictions_class = list(est.predict(x=iris.data, outputs=['class']))
+    self.assertEqual(len(predictions), iris.target.shape[0])
+    classes_batch = np.array([p['class'] for p in predictions])
+    self.assertAllClose(
+        classes_batch,
+        np.array([p['class'] for p in predictions_class]))
+    self.assertAllClose(
+        classes_batch,
+        np.argmax(np.array([p['prob'] for p in predictions]), axis=1))
+    other_score = _sklearn.accuracy_score(iris.target, classes_batch)
    self.assertAllClose(other_score, scores['accuracy'])
    self.assertTrue('global_step' in scores)
    self.assertEqual(scores['global_step'], 100)
@ -268,8 +318,8 @@ class EstimatorTest(tf.test.TestCase):
    est = tf.contrib.learn.Estimator(model_fn=logistic_model_no_mode_fn)
    est.fit(input_fn=iris_input_fn, steps=100)
    _ = est.evaluate(input_fn=iris_input_fn, steps=1)
-    predictions = est.predict(x=iris.data)['class']
-    self.assertEqual(predictions.shape[0], iris.target.shape[0])
+    predictions = list(est.predict(x=iris.data))
+    self.assertEqual(len(predictions), iris.target.shape[0])

  def testIrisIterator(self):
    iris = tf.contrib.learn.datasets.load_iris()
@ -278,8 +328,8 @@ class EstimatorTest(tf.test.TestCase):
    y_iter = itertools.islice(iris.target, 100)
    est.fit(x_iter, y_iter, steps=100)
    _ = est.evaluate(input_fn=iris_input_fn, steps=1)
-    predictions = est.predict(x=iris.data)['class']
-    self.assertEqual(predictions.shape[0], iris.target.shape[0])
+    predictions = list(est.predict(x=iris.data))
+    self.assertEqual(len(predictions), iris.target.shape[0])

  def testTrainInputFn(self):
    est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
@ -304,32 +354,16 @@ class EstimatorTest(tf.test.TestCase):
    est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
    boston = tf.contrib.learn.datasets.load_boston()
    est.fit(input_fn=boston_input_fn, steps=1)
-    output = est.predict(boston.data)
-    self.assertEqual(output.shape[0], boston.target.shape[0])
+    output = list(est.predict(x=boston.data, batch_size=10))
+    self.assertEqual(len(output), boston.target.shape[0])

  def testPredictInputFn(self):
-    est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
-    boston = tf.contrib.learn.datasets.load_boston()
-    est.fit(input_fn=boston_input_fn, steps=1)
-    output = est.predict(input_fn=boston_input_fn)
-    self.assertEqual(output.shape[0], boston.target.shape[0])
-
-  def testPredictAsIterable(self):
-    est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
-    boston = tf.contrib.learn.datasets.load_boston()
-    est.fit(input_fn=boston_input_fn, steps=1)
-    self.assertEqual(
-        len(list(est.predict(boston.data, batch_size=10, as_iterable=True))),
-        boston.target.shape[0])
-
-  def testPredictInputFnAsIterable(self):
    est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
    boston = tf.contrib.learn.datasets.load_boston()
    est.fit(input_fn=boston_input_fn, steps=1)
    input_fn = functools.partial(boston_input_fn, num_epochs=1)
-    self.assertEqual(
-        len(list(est.predict(input_fn=input_fn, as_iterable=True))),
-        boston.target.shape[0])
+    output = list(est.predict(input_fn=input_fn))
+    self.assertEqual(len(output), boston.target.shape[0])

  def testWrongInput(self):
    def other_input_fn():
--- a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function

 import random
+import numpy as np
 import tensorflow as tf

 from tensorflow.contrib.learn.python import learn
@ -28,36 +29,6 @@ from tensorflow.contrib.learn.python.learn.estimators._sklearn import accuracy_s
 from tensorflow.contrib.learn.python.learn.estimators._sklearn import train_test_split


-# TODO(b/29580537): Remove when we deprecate feature column inference.
-class InferredfeatureColumnTest(tf.test.TestCase):
-  """Custom optimizer tests."""
-
-  def testIrisMomentum(self):
-    random.seed(42)
-
-    iris = datasets.load_iris()
-    x_train, x_test, y_train, y_test = train_test_split(iris.data,
-                                                        iris.target,
-                                                        test_size=0.2,
-                                                        random_state=42)
-
-    def custom_optimizer():
-      return tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9)
-
-    cont_features = [
-        tf.contrib.layers.real_valued_column("", dimension=4)]
-    classifier = learn.DNNClassifier(
-        feature_columns=cont_features,
-        hidden_units=[10, 20, 10],
-        n_classes=3,
-        optimizer=custom_optimizer,
-        config=learn.RunConfig(tf_random_seed=1))
-    classifier.fit(x_train, y_train, steps=400)
-    score = accuracy_score(y_test, classifier.predict(x_test))
-
-    self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
-
-
 class FeatureEngineeringFunctionTest(tf.test.TestCase):
  """Tests feature_engineering_fn."""

@ -145,7 +116,8 @@ class CustomOptimizer(tf.test.TestCase):
        optimizer=custom_optimizer,
        config=learn.RunConfig(tf_random_seed=1))
    classifier.fit(x_train, y_train, steps=400)
-    score = accuracy_score(y_test, classifier.predict(x_test))
+    predictions = np.array(list(classifier.predict(x_test)))
+    score = accuracy_score(y_test, predictions)

    self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))

--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@ -0,0 +1,850 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Abstractions for the head(s) of a model.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+
+from tensorflow.contrib import losses
+from tensorflow.contrib import metrics as metrics_lib
+from tensorflow.contrib.learn.python.learn import metric_spec
+from tensorflow.contrib.learn.python.learn.estimators import estimator
+from tensorflow.contrib.session_bundle import exporter
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import variables
+from tensorflow.python.training import training
+
+
+# TODO(zakaria): add functions that creates a head and returns ModelOpFn
+
+
+def _regression_head(label_name=None,
+                     weight_column_name=None,
+                     target_dimension=1,
+                     enable_centered_bias=False, head_name=None):
+  """Creates a _Head for linear regression.
+
+  Args:
+    label_name: String, name of the key in label dict. Can be null if label
+        is a tensor (single headed models).
+    weight_column_name: A string defining feature column name representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    target_dimension: dimension of the target for multilabels.
+    enable_centered_bias: A bool. If True, estimator will learn a centered
+      bias variable for each class. Rest of the model structure learns the
+      residual after centered bias.
+    head_name: name of the head. If provided, predictions, summary and metrics
+      keys will be prefixed by the head_name and an underscore.
+
+  Returns:
+    An instance of _Head
+  """
+  return _RegressionHead(train_loss_fn=_mean_squared_loss,
+                         eval_loss_fn=_mean_squared_loss,
+                         label_name=label_name,
+                         weight_column_name=weight_column_name,
+                         target_dimension=target_dimension,
+                         enable_centered_bias=enable_centered_bias,
+                         head_name=head_name)
+
+# TODO(zakaria): Add logistic_regression_head
+
+
+def _multi_class_head(n_classes, label_name=None, weight_column_name=None,
+                      enable_centered_bias=False, head_name=None,
+                      thresholds=None):
+  """Creates a _Head for multi class single label classification.
+
+  The Head uses softmax cross entropy loss.
+
+  Args:
+    n_classes: Integer, number of classes, must be >= 2
+    label_name: String, name of the key in label dict. Can be null if label
+        is a tensor (single headed models).
+    weight_column_name: A string defining feature column name representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    enable_centered_bias: A bool. If True, estimator will learn a centered
+      bias variable for each class. Rest of the model structure learns the
+      residual after centered bias.
+    head_name: name of the head. If provided, predictions, summary and metrics
+      keys will be prefixed by the head_name and an underscore.
+    thresholds: thresholds for eval metrics, defaults to [.5]
+
+  Returns:
+    An instance of _MultiClassHead.
+
+  Raises:
+    ValueError: if n_classes is < 2
+  """
+  if n_classes < 2:
+    raise ValueError("n_classes must be > 1 for classification.")
+  if n_classes == 2:
+    loss_fn = _log_loss_with_two_classes
+  else:
+    loss_fn = _softmax_cross_entropy_loss
+  return _MultiClassHead(train_loss_fn=loss_fn,
+                         eval_loss_fn=loss_fn,
+                         n_classes=n_classes,
+                         label_name=label_name,
+                         weight_column_name=weight_column_name,
+                         enable_centered_bias=enable_centered_bias,
+                         head_name=head_name,
+                         thresholds=thresholds)
+
+
+def _binary_svm_head(label_name=None, weight_column_name=None,
+                     enable_centered_bias=False, head_name=None,
+                     thresholds=None,):
+  """Creates a _TargetColumn for binary classification with SVMs.
+
+  The target column uses binary hinge loss.
+
+  Args:
+    label_name: String, name of the key in label dict. Can be null if label
+      is a tensor (single headed models).
+    weight_column_name: A string defining feature column name representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    enable_centered_bias: A bool. If True, estimator will learn a centered
+      bias variable for each class. Rest of the model structure learns the
+      residual after centered bias.
+    head_name: name of the head. If provided, predictions, summary and metrics
+      keys will be prefixed by the head_name and an underscore.
+    thresholds: thresholds for eval metrics, defaults to [.5]
+
+  Returns:
+    An instance of _TargetColumn.
+
+  """
+  return _BinarySvmHead(label_name=label_name,
+                        weight_column_name=weight_column_name,
+                        enable_centered_bias=enable_centered_bias,
+                        head_name=head_name,
+                        thresholds=thresholds)
+
+
+def _multi_label_head(n_classes, label_name=None, weight_column_name=None,
+                      enable_centered_bias=False, head_name=None,
+                      thresholds=None):
+  """Creates a _Head for multi label classification.
+
+  The Head uses softmax cross entropy loss.
+
+  Args:
+    n_classes: Integer, number of classes, must be >= 2
+    label_name: String, name of the key in label dict. Can be null if label
+        is a tensor (single headed models).
+    weight_column_name: A string defining feature column name representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    enable_centered_bias: A bool. If True, estimator will learn a centered
+      bias variable for each class. Rest of the model structure learns the
+      residual after centered bias.
+    head_name: name of the head. If provided, predictions, summary and metrics
+      keys will be prefixed by the head_name and an underscore.
+    thresholds: thresholds for eval metrics, defaults to [.5]
+
+  Returns:
+    An instance of _MultiClassHead.
+
+  Raises:
+    ValueError: if n_classes is < 2
+  """
+  if n_classes < 2:
+    raise ValueError("n_classes must be > 1 for classification.")
+  return _MultiLabelHead(n_classes=n_classes,
+                         label_name=label_name,
+                         weight_column_name=weight_column_name,
+                         enable_centered_bias=enable_centered_bias,
+                         head_name=head_name,
+                         thresholds=thresholds)
+
+
+# TODO(zakaria): Make the classes public once we are ready for users to subclass
+#   them.
+class _Head(object):
+  """Interface for the head/top of a model.
+
+  Given logits or output of a hidden layer, a Head knows how to compute
+  predictions, loss, default metric and export signature.
+  """
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractproperty
+  def logits_dimension(self):
+    raise NotImplementedError("Calling an abstract method.")
+
+  def head_ops(self, features, target, mode, train_op_fn, logits=None,
+               logits_input=None):
+    """Returns ops for a model_fn.
+
+    Args:
+      features: input dict.
+      target: target dict or tensor.
+      mode: estimator's ModeKeys
+      train_op_fn: function that takes a scalar loss and returns an op to
+          optimize with the loss.
+      logits: logits to be used for the head.
+      logits_input: tensor to build logits from.
+
+    Returns:
+      `estimator.ModelFnOps`
+
+    Raises:
+      ValueError: if mode is not recognized.
+    """
+    _check_logits_input_not_supported(logits, logits_input)
+    if mode == estimator.ModeKeys.TRAIN:
+      loss, additional_train_op = self._training_loss(features, target,
+                                                      logits, logits_input)
+
+      train_op = train_op_fn(loss) if train_op_fn else None
+
+      if additional_train_op:
+        if train_op:
+          train_op = control_flow_ops.group(train_op, *additional_train_op)
+        else:
+          train_op = control_flow_ops.group(*additional_train_op)
+
+      return estimator.ModelFnOps(None, loss, train_op,
+                                  self._default_metric(),
+                                  self._create_signature_fn())
+    if mode == estimator.ModeKeys.INFER:
+      predictions = self._infer_op(logits, logits_input)
+      return estimator.ModelFnOps(predictions, None, None,
+                                  self._default_metric(),
+                                  self._create_signature_fn())
+    if mode == estimator.ModeKeys.EVAL:
+      predictions, loss = self._eval_op(features, target, logits, logits_input)
+      return estimator.ModelFnOps(predictions, loss, None,
+                                  self._default_metric(),
+                                  self._create_signature_fn())
+    raise ValueError("mode=%s unrecognized" % str(mode))
+
+  @abc.abstractmethod
+  def _training_loss(self, features, target, logits=None, logits_input=None,
+                     name="training_loss"):
+    raise NotImplementedError("Calling an abstract method.")
+
+  @abc.abstractmethod
+  def _infer_op(self, logits=None, logits_input=None, name="infer_op"):
+    raise NotImplementedError("Calling an abstract method.")
+
+  @abc.abstractmethod
+  def _eval_op(self, features, target, logits=None, logits_input=None,
+               name="eval_op"):
+    raise NotImplementedError("Calling an abstract method.")
+
+  @abc.abstractmethod
+  def _default_metric(self):
+    raise NotImplementedError("Calling an abstract method.")
+
+  @abc.abstractmethod
+  def _create_signature_fn(self):
+    """Creates signature function for the Head.
+    """
+    raise NotImplementedError("Calling an abstract method.")
+
+
+class _RegressionHead(_Head):
+  """_Head for regression."""
+
+  def __init__(self, train_loss_fn, eval_loss_fn, label_name,
+               weight_column_name, target_dimension, enable_centered_bias,
+               head_name):
+    """Base type for all single heads.
+
+    Args:
+      train_loss_fn: loss_fn for training.
+      eval_loss_fn: loss_fn for eval.
+      label_name: String, name of the key in label dict. Can be null if label
+          is a tensor (single headed models).
+      weight_column_name: A string defining feature column name representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example.
+      target_dimension: Integer, number of label columns.
+      enable_centered_bias: A bool. If True, estimator will learn a centered
+        bias variable for each class. Rest of the model structure learns the
+        residual after centered bias.
+      head_name: name of the head. If provided, predictions, summary and metrics
+        keys will be prefixed by the head_name and an underscore.
+    """
+    self._train_loss_fn = train_loss_fn
+    self._eval_loss_fn = eval_loss_fn
+    self._logits_dimension = target_dimension
+    self._label_name = label_name
+    self._weight_column_name = weight_column_name
+    self._head_name = head_name
+    self._enable_centered_bias = enable_centered_bias
+    self._centered_bias_weight_collection = _head_prefixed(head_name,
+                                                           "centered_bias")
+
+  @property
+  def logits_dimension(self):
+    return self._logits_dimension
+
+  def _training_loss(self, features, target, logits=None,
+                     logits_input=None, name="training_loss"):
+    """Returns training loss tensor for this head.
+
+    Training loss is different from the loss reported on the tensorboard as we
+    should respect the example weights when computing the gradient.
+
+      L = sum_{i} w_{i} * l_{i} / B
+
+    where B is the number of examples in the batch, l_{i}, w_{i} are individual
+    losses, and example weight.
+
+    Args:
+      features: features dict.
+      target: either a tensor for labels or in multihead case, a dict of string
+        to target tensor.
+      logits: logits, a float tensor.
+      logits_input: Output of last hidden layer.
+      name: Op name.
+
+    Returns:
+      A tuple of training Loss and additional_train_op (possibly None)
+    """
+    target = _check_target(target, self._label_name)
+
+    centered_bias_step = None
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+      centered_bias_step = [_centered_bias_step(
+          self.logits_dimension,
+          self._centered_bias_weight_collection,
+          target,
+          self._train_loss_fn)]
+
+    loss_unweighted = self._train_loss_fn(logits, target)
+    loss, weighted_average_loss = _loss(
+        loss_unweighted,
+        _weight_tensor(features, self._weight_column_name),
+        name=name)
+    logging_ops.scalar_summary(_head_prefixed(self._head_name, "loss"),
+                               weighted_average_loss)
+    return loss, centered_bias_step
+
+  def _eval_op(self, features, target, logits=None, logits_input=None,
+               name="eval_op"):
+    target = _check_target(target, self._label_name)
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+    loss_unweighted = self._eval_loss_fn(logits, target)
+    loss, _ = _loss(loss_unweighted,
+                    _weight_tensor(features, self._weight_column_name),
+                    name=name)
+
+    predictions = self._logits_to_prediction(logits)
+
+    return predictions, loss
+
+  def _infer_op(self, logits=None, logits_input=None):
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+    return self._logits_to_prediction(logits)
+
+  def _logits_to_prediction(self, logits=None):
+    predictions = {}
+    if self.logits_dimension == 1:
+      predictions[PedictionKey.SCORES] = array_ops.squeeze(
+          logits, squeeze_dims=[1])
+    else:
+      predictions[PedictionKey.SCORES] = logits
+    return predictions
+
+  # pylint: disable=undefined-variable
+  def _create_signature_fn(self):
+    def _regression_signature_fn(examples, unused_features, predictions):
+      if isinstance(predictions, dict):
+        score = predictions[PedictionKey.SCORES]
+      else:
+        score = predictions
+
+      default_signature = exporter.regression_signature(
+          input_tensor=examples, output_tensor=score)
+      # TODO(zakaria): add validation
+      return default_signature, {}
+    return _regression_signature_fn
+
+  def _default_metric(self):
+    return {_head_prefixed(self._head_name, MetricKey.LOSS):
+            _weighted_average_loss_metric_spec(self._eval_loss_fn,
+                                               PedictionKey.SCORES,
+                                               self._label_name,
+                                               self._weight_column_name)}
+
+
+class _MultiClassHead(_Head):
+  """_Head for classification."""
+
+  def __init__(self, train_loss_fn, eval_loss_fn, n_classes, label_name,
+               weight_column_name, enable_centered_bias, head_name,
+               thresholds=None):
+    """Base type for all single heads.
+
+    Args:
+      train_loss_fn: loss_fn for training.
+      eval_loss_fn: loss_fn for eval.
+      n_classes: number of classes.
+      label_name: String, name of the key in label dict. Can be null if label
+          is a tensor (single headed models).
+      weight_column_name: A string defining feature column name representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example.
+      enable_centered_bias: A bool. If True, estimator will learn a centered
+        bias variable for each class. Rest of the model structure learns the
+        residual after centered bias.
+      head_name: name of the head. If provided, predictions, summary and metrics
+        keys will be prefixed by the head_name and an underscore.
+      thresholds: thresholds for eval.
+
+    Raises:
+      ValueError: if n_classes is invalid.
+    """
+    if n_classes < 2:
+      raise ValueError("n_classes must be >= 2")
+    self._thresholds = thresholds if thresholds else [.5]
+
+    self._train_loss_fn = train_loss_fn
+    self._eval_loss_fn = eval_loss_fn
+    self._logits_dimension = 1 if n_classes == 2 else n_classes
+    self._label_name = label_name
+    self._weight_column_name = weight_column_name
+    self._head_name = head_name
+    self._enable_centered_bias = enable_centered_bias
+    self._centered_bias_weight_collection = _head_prefixed(head_name,
+                                                           "centered_bias")
+
+  @property
+  def logits_dimension(self):
+    return self._logits_dimension
+
+  def _training_loss(self, features, target, logits=None,
+                     logits_input=None, name="training_loss"):
+    """Returns training loss tensor for this head.
+
+    Training loss is different from the loss reported on the tensorboard as we
+    should respect the example weights when computing the gradient.
+
+      L = sum_{i} w_{i} * l_{i} / B
+
+    where B is the number of examples in the batch, l_{i}, w_{i} are individual
+    losses, and example weight.
+
+    Args:
+      features: features dict.
+      target: either a tensor for labels or in multihead case, a dict of string
+        to target tensor.
+      logits: logits, a float tensor.
+      logits_input: Output of last hidden layer.
+      name: Op name.
+
+    Returns:
+      A tuple of training Loss and additional_train_op (possibly None)
+    """
+    target = _check_target(target, self._label_name)
+
+    centered_bias_step = None
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+      centered_bias_step = [_centered_bias_step(
+          self.logits_dimension,
+          self._centered_bias_weight_collection,
+          target,
+          self._train_loss_fn)]
+
+    loss_unweighted = self._train_loss_fn(logits, target)
+    loss, weighted_average_loss = _loss(
+        loss_unweighted,
+        _weight_tensor(features, self._weight_column_name),
+        name=name)
+    logging_ops.scalar_summary(_head_prefixed(self._head_name, "loss"),
+                               weighted_average_loss)
+    return loss, centered_bias_step
+
+  def _eval_op(self, features, target, logits=None, logits_input=None,
+               name="eval_op"):
+    target = _check_target(target, self._label_name)
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+    loss_unweighted = self._eval_loss_fn(logits, target)
+    loss, _ = _loss(loss_unweighted,
+                    _weight_tensor(features, self._weight_column_name),
+                    name=name)
+
+    predictions = self._logits_to_prediction(logits)
+
+    return predictions, loss
+
+  def _infer_op(self, logits=None, logits_input=None):
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+    return self._logits_to_prediction(logits)
+
+  def _logits_to_prediction(self, logits=None):
+    predictions = {PedictionKey.LOGITS: logits}
+    if self.logits_dimension == 1:
+      predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits)
+      logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
+    predictions[PedictionKey.PROBABILITIES] = nn.softmax(logits)
+    # Workaround for argmax dropping the second demension.
+    predictions[PedictionKey.CLASSES] = array_ops.expand_dims(
+        math_ops.argmax(logits, 1), 1)
+    return predictions
+
+  def _create_signature_fn(self):
+    """See superclass."""
+    def _classification_signature_fn(examples, unused_features, predictions):
+      """Servo signature function."""
+      if isinstance(predictions, dict):
+        default_signature = exporter.classification_signature(
+            input_tensor=examples,
+            classes_tensor=predictions[PedictionKey.CLASSES],
+            scores_tensor=predictions[PedictionKey.PROBABILITIES])
+      else:
+        default_signature = exporter.classification_signature(
+            input_tensor=examples,
+            scores_tensor=predictions)
+
+      # TODO(zakaria): add validation
+      return default_signature, {}
+    return _classification_signature_fn
+
+  def _default_metric(self):
+    metrics = {_head_prefixed(self._head_name, MetricKey.LOSS):
+               _weighted_average_loss_metric_spec(self._eval_loss_fn,
+                                                  PedictionKey.LOGITS,
+                                                  self._label_name,
+                                                  self._weight_column_name)}
+
+    # TODO(b/29366811): This currently results in both an "accuracy" and an
+    # "accuracy/threshold_0.500000_mean" metric for binary classification.
+    metrics[_head_prefixed(self._head_name, MetricKey.ACCURACY)] = (
+        metric_spec.MetricSpec(metrics_lib.streaming_accuracy,
+                               PedictionKey.CLASSES, self._label_name,
+                               self._weight_column_name))
+    if self.logits_dimension == 1:
+      def _add_binary_metric(metric_key, metric_fn):
+        metrics[_head_prefixed(self._head_name, metric_key)] = (
+            metric_spec.MetricSpec(metric_fn,
+                                   PedictionKey.LOGISTIC,
+                                   self._label_name))
+      _add_binary_metric(MetricKey.PREDICTION_MEAN, _predictions_streaming_mean)
+      _add_binary_metric(MetricKey.TARGET_MEAN, _target_streaming_mean)
+
+      # Also include the streaming mean of the label as an accuracy baseline, as
+      # a reminder to users.
+      _add_binary_metric(MetricKey.ACCURACY_BASELINE, _target_streaming_mean)
+
+      _add_binary_metric(MetricKey.AUC, _streaming_auc)
+
+      for threshold in self._thresholds:
+        _add_binary_metric(MetricKey.ACCURACY_MEAN % threshold,
+                           _accuracy_at_threshold(threshold))
+        # Precision for positive examples.
+        _add_binary_metric(MetricKey.PRECISION_MEAN % threshold,
+                           _streaming_at_threshold(
+                               metrics_lib.streaming_precision_at_thresholds,
+                               threshold),)
+        # Recall for positive examples.
+        _add_binary_metric(MetricKey.RECALL_MEAN % threshold,
+                           _streaming_at_threshold(
+                               metrics_lib.streaming_recall_at_thresholds,
+                               threshold))
+    return metrics
+
+
+def _check_target(target, label_name):
+  target = target[label_name] if isinstance(target, dict) else target
+  if isinstance(target, ops.SparseTensor):
+    raise ValueError("SparseTensor is not supported as a target/label.")
+  return target
+
+
+class _BinarySvmHead(_MultiClassHead):
+  """_Head for binary classification using SVMs."""
+
+  def __init__(self, label_name, weight_column_name, enable_centered_bias,
+               head_name, thresholds):
+    def loss_fn(logits, target):
+      check_shape_op = control_flow_ops.Assert(
+          math_ops.less_equal(array_ops.rank(target), 2),
+          ["target's shape should be either [batch_size, 1] or [batch_size]"])
+      with ops.control_dependencies([check_shape_op]):
+        target = array_ops.reshape(
+            target, shape=[array_ops.shape(target)[0], 1])
+      return losses.hinge_loss(logits, target)
+
+    super(_BinarySvmHead, self).__init__(
+        train_loss_fn=loss_fn,
+        eval_loss_fn=loss_fn,
+        n_classes=2,
+        label_name=label_name,
+        weight_column_name=weight_column_name,
+        enable_centered_bias=enable_centered_bias,
+        head_name=head_name,
+        thresholds=thresholds)
+
+  def _logits_to_prediction(self, logits=None):
+    predictions = {}
+    # Workaround for argmax dropping the second demension.
+    predictions[PedictionKey.LOGITS] = array_ops.expand_dims(
+        math_ops.argmax(logits, 1), 1)
+    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
+    predictions[PedictionKey.CLASSES] = array_ops.expand_dims(
+        math_ops.argmax(logits, 1), 1)
+
+    return predictions
+
+
+class _MultiLabelHead(_MultiClassHead):
+  """_Head for multlabel classification."""
+
+  # TODO(zakaria): add signature and metric for multilabel.
+  def __init__(self, n_classes, label_name,
+               weight_column_name, enable_centered_bias, head_name,
+               thresholds):
+
+    super(_MultiLabelHead, self).__init__(
+        train_loss_fn=_sigmoid_cross_entropy_loss,
+        eval_loss_fn=_sigmoid_cross_entropy_loss,
+        n_classes=n_classes,
+        label_name=label_name,
+        weight_column_name=weight_column_name,
+        enable_centered_bias=enable_centered_bias,
+        head_name=head_name,
+        thresholds=thresholds)
+
+  def _logits_to_prediction(self, logits=None):
+    predictions = {PedictionKey.LOGITS: logits}
+    if self.logits_dimension == 1:
+      predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits)
+      logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
+    predictions[PedictionKey.PROBABILITIES] = math_ops.sigmoid(logits)
+    # Workaround for argmax dropping the second demension.
+    predictions[PedictionKey.CLASSES] = math_ops.to_int64(
+        math_ops.greater(logits, 0))
+    return predictions
+
+
+def _weighted_loss(loss, weight):
+  """Returns cumulative weighted loss."""
+  unweighted_loss = array_ops.reshape(loss, shape=(-1,))
+  weighted_loss = math_ops.mul(unweighted_loss,
+                               array_ops.reshape(
+                                   weight, shape=(-1,)))
+  return weighted_loss
+
+
+def _weight_tensor(features, weight_column_name):
+  if not weight_column_name:
+    return None
+  else:
+    return array_ops.reshape(
+        math_ops.to_float(features[weight_column_name]),
+        shape=(-1,))
+
+
+def _loss(loss_unweighted, weight, name):
+  """Returns loss."""
+  if weight is None:
+    loss = math_ops.reduce_mean(loss_unweighted, name=name)
+    return loss, loss
+  else:
+    loss_weighted = _weighted_loss(loss_unweighted, weight)
+    weighted_average_loss = math_ops.div(
+        math_ops.reduce_sum(loss_weighted),
+        math_ops.to_float(math_ops.reduce_sum(weight)),
+        name="weighted_average_loss")
+    loss = math_ops.reduce_mean(loss_weighted, name=name)
+    return loss, weighted_average_loss
+
+
+def _check_logits_input_not_supported(logits, logits_input):
+  if logits_input is not None or logits is None:
+    raise NotImplementedError("logits_input is not supported yet, "
+                              "must pass logits")
+
+
+def _centered_bias(logits_dimension, weight_collection):
+  """Creates and returns centered bias."""
+  centered_bias = variables.Variable(
+      array_ops.zeros([logits_dimension]),
+      collections=[weight_collection, ops.GraphKeys.VARIABLES],
+      name="centered_bias_weight")
+  logging_ops.scalar_summary(
+      ["centered_bias_%d" % cb for cb in range(logits_dimension)],
+      array_ops.reshape(centered_bias, [-1]))
+  return centered_bias
+
+
+def _centered_bias_step(logits_dimension, weight_collection, target,
+                        train_loss_fn):
+  """Creates and returns training op for centered bias."""
+  centered_bias = ops.get_collection(weight_collection)
+  batch_size = array_ops.shape(target)[0]
+  logits = array_ops.reshape(
+      array_ops.tile(centered_bias[0], [batch_size]),
+      [batch_size, logits_dimension])
+  with ops.name_scope(None, "centered_bias", (target, logits)):
+    centered_bias_loss = math_ops.reduce_mean(
+        train_loss_fn(logits, target), name="training_loss")
+  # Learn central bias by an optimizer. 0.1 is a convervative lr for a
+  # single variable.
+  return training.AdagradOptimizer(0.1).minimize(
+      centered_bias_loss, var_list=centered_bias)
+
+
+def _head_prefixed(head_name, val):
+  return "%s_%s" % (head_name, val) if head_name else val
+
+
+# TODO(zakaria): use contrib losses.
+def _mean_squared_loss(logits, target):
+  # To prevent broadcasting inside "-".
+  if len(target.get_shape()) == 1:
+    target = array_ops.expand_dims(target, dim=[1])
+  # TODO(zakaria): make sure it does not recreate the broadcast bug.
+  if len(logits.get_shape()) == 1:
+    logits = array_ops.expand_dims(logits, dim=[1])
+  logits.get_shape().assert_is_compatible_with(target.get_shape())
+  return math_ops.square(logits - math_ops.to_float(target))
+
+
+def _log_loss_with_two_classes(logits, target):
+  # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target.
+  if len(target.get_shape()) == 1:
+    target = array_ops.expand_dims(target, dim=[1])
+  loss_vec = nn.sigmoid_cross_entropy_with_logits(logits,
+                                                  math_ops.to_float(target))
+  return loss_vec
+
+
+def _softmax_cross_entropy_loss(logits, target):
+  # Check that we got int32/int64 for classification.
+  if (not target.dtype.is_compatible_with(dtypes.int64) and
+      not target.dtype.is_compatible_with(dtypes.int32)):
+    raise ValueError("Target's dtype should be int32, int64 or compatible. "
+                     "Instead got %s." % target.dtype)
+  # sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
+  if len(target.get_shape()) == 2:
+    target = array_ops.squeeze(target, squeeze_dims=[1])
+  loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, target)
+  return loss_vec
+
+
+def _sigmoid_cross_entropy_loss(logits, target):
+  # sigmoid_cross_entropy_with_logits requires [batch_size, n_classes] target.
+  return nn.sigmoid_cross_entropy_with_logits(logits, math_ops.to_float(target))
+
+
+def _float_weights_or_none(weights):
+  if weights is None:
+    return None
+  return math_ops.to_float(weights)
+
+
+def _weighted_average_loss_metric_spec(loss_fn, predictoin_key,
+                                       label_key, weight_key):
+  def _streaming_weighted_average_loss(predictions, target, weights=None):
+    loss_unweighted = loss_fn(predictions, target)
+    _, weighted_average_loss = _loss(loss_unweighted,
+                                     weights,
+                                     name="eval_loss")
+    return metrics_lib.streaming_mean(weighted_average_loss)
+  return metric_spec.MetricSpec(_streaming_weighted_average_loss,
+                                predictoin_key, label_key, weight_key)
+
+
+def _target_streaming_mean(unused_predictions, target, weights=None):
+  return metrics_lib.streaming_mean(target, weights=weights)
+
+
+def _predictions_streaming_mean(predictions, unused_target, weights=None):
+  return metrics_lib.streaming_mean(predictions, weights=weights)
+
+
+def _streaming_auc(predictions, target, weights=None):
+  return metrics_lib.streaming_auc(predictions, target,
+                                   weights=_float_weights_or_none(weights))
+
+
+def _accuracy_at_threshold(threshold):
+
+  def _accuracy_metric(predictions, target, weights=None):
+    threshold_predictions = math_ops.to_float(
+        math_ops.greater_equal(predictions, threshold))
+    return metrics_lib.streaming_accuracy(predictions=threshold_predictions,
+                                          labels=target,
+                                          weights=weights)
+
+  return _accuracy_metric
+
+
+def _streaming_at_threshold(streaming_metrics_fn, threshold):
+
+  def _streaming_metrics(predictions, target, weights=None):
+    precision_tensor, update_op = streaming_metrics_fn(
+        predictions, labels=target, thresholds=[threshold],
+        weights=_float_weights_or_none(weights))
+    return array_ops.squeeze(precision_tensor), update_op
+
+  return _streaming_metrics
+
+
+# PedictionKey.CLASSES
+class PedictionKey(object):
+  CLASSES = "classes"
+  PROBABILITIES = "probabilities"
+  LOGITS = "logits"
+  LOGISTIC = "logistic"
+  SCORES = "scores"
+
+
+class MetricKey(object):
+  LOSS = "loss"
+  AUC = "auc"
+  PREDICTION_MEAN = "labels/prediction_mean"
+  TARGET_MEAN = "labels/actual_target_mean"
+  ACCURACY = "accuracy"
+  ACCURACY_BASELINE = "accuracy/baseline_target_mean"
+  ACCURACY_MEAN = "accuracy/threshold_%f_mean"
+  PRECISION_MEAN = "precision/positive_threshold_%f_mean"
+  RECALL_MEAN = "recall/positive_threshold_%f_mean"
--- a/tensorflow/contrib/learn/python/learn/estimators/head_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head_test.py
@ -0,0 +1,174 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for head.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
+
+
+class RegressionModelHeadTest(tf.test.TestCase):
+
+  # TODO(zakaria): test multilabel regresssion.
+  def testRegression(self):
+    head = head_lib._regression_head()
+    with tf.Graph().as_default(), tf.Session() as sess:
+      prediction = tf.constant([[1.], [1.], [3.]])
+      targets = tf.constant([[0.], [1.], [1.]])
+      model_fn_ops = head.head_ops({}, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=prediction)
+      self.assertAlmostEqual(5. / 3, sess.run(model_fn_ops.loss))
+
+  def testRegressionWithWeights(self):
+    head = head_lib._regression_head(
+        weight_column_name="label_weight")
+    with tf.Graph().as_default(), tf.Session() as sess:
+      features = {"label_weight": tf.constant([[2.], [5.], [0.]])}
+      prediction = tf.constant([[1.], [1.], [3.]])
+      targets = tf.constant([[0.], [1.], [1.]])
+      model_fn_ops = head.head_ops(features, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=prediction)
+      self.assertAlmostEqual(2. / 3, sess.run(model_fn_ops.loss), places=3)
+
+  def testErrorInSparseTensorTarget(self):
+    head = head_lib._regression_head()
+    with tf.Graph().as_default():
+      prediction = tf.constant([[1.], [1.], [3.]])
+      targets = tf.SparseTensor(
+          indices=tf.constant([[0, 0], [1, 0], [2, 0]], dtype=tf.int64),
+          values=tf.constant([0., 1., 1.]),
+          shape=[3, 1])
+      with self.assertRaisesRegexp(
+          ValueError, "SparseTensor is not supported as a target"):
+        head.head_ops({}, targets, tf.contrib.learn.ModeKeys.TRAIN, None,
+                      logits=prediction)
+
+
+class MultiClassModelHeadTest(tf.test.TestCase):
+
+  def testBinaryClassification(self):
+    head = head_lib._multi_class_head(n_classes=2)
+    with tf.Graph().as_default(), tf.Session() as sess:
+      logits = tf.constant([[1.], [1.]])
+      targets = tf.constant([[1.], [0.]])
+      # logloss: z:label, x:logit
+      # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
+      model_fn_ops = head.head_ops({}, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=logits)
+      self.assertAlmostEqual(.81326163, sess.run(model_fn_ops.loss))
+
+  def testErrorInSparseTensorTarget(self):
+    head = head_lib._multi_class_head(n_classes=2)
+    with tf.Graph().as_default():
+      prediction = tf.constant([[1.], [1.], [3.]])
+      targets = tf.SparseTensor(
+          indices=tf.constant([[0, 0], [1, 0], [2, 0]], dtype=tf.int64),
+          values=tf.constant([0, 1, 1]),
+          shape=[3, 1])
+      with self.assertRaisesRegexp(
+          ValueError, "SparseTensor is not supported as a target"):
+        head.head_ops({}, targets, tf.contrib.learn.ModeKeys.TRAIN, None,
+                      logits=prediction)
+
+  def testBinaryClassificationWithWeights(self):
+    head = head_lib._multi_class_head(
+        n_classes=2, weight_column_name="label_weight")
+    with tf.Graph().as_default(), tf.Session() as sess:
+      features = {"label_weight": tf.constant([[1.], [0.]])}
+      logits = tf.constant([[1.], [1.]])
+      targets = tf.constant([[1.], [0.]])
+      # logloss: z:label, x:logit
+      # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
+      model_fn_ops = head.head_ops(features, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=logits)
+      self.assertAlmostEqual(.31326166 / 2, sess.run(model_fn_ops.loss))
+
+  def testMultiClass(self):
+    head = head_lib._multi_class_head(n_classes=3)
+    with tf.Graph().as_default(), tf.Session() as sess:
+      logits = tf.constant([[1., 0., 0.]])
+      targets = tf.constant([2])
+      # logloss: z:label, x:logit
+      # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
+      model_fn_ops = head.head_ops({}, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=logits)
+      self.assertAlmostEqual(1.5514446, sess.run(model_fn_ops.loss))
+
+  def testMultiClassWithWeight(self):
+    head = head_lib._multi_class_head(
+        n_classes=3, weight_column_name="label_weight")
+    with tf.Graph().as_default(), tf.Session() as sess:
+      features = {"label_weight": tf.constant([0.1])}
+      logits = tf.constant([[1., 0., 0.]])
+      targets = tf.constant([2])
+      # logloss: z:label, x:logit
+      # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
+      model_fn_ops = head.head_ops(features, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=logits)
+      self.assertAlmostEqual(.15514446, sess.run(model_fn_ops.loss))
+
+  def testMultiClassWithInvalidNClass(self):
+    try:
+      head_lib._multi_class_head(n_classes=1)
+      self.fail("Softmax with no n_classes did not raise error.")
+    except ValueError:
+      # Expected
+      pass
+
+
+class BinarySvmModelHeadTest(tf.test.TestCase):
+
+  def testBinarySVMDefaultWeights(self):
+    head = head_lib._binary_svm_head()
+    predictions = tf.constant([[-0.5], [1.2]])
+    targets = tf.constant([0, 1])
+    model_fn_ops = head.head_ops({}, targets,
+                                 tf.contrib.learn.ModeKeys.TRAIN,
+                                 None, logits=predictions)
+    # Prediction for first example is in the right side of the hyperplane (i.e.,
+    # < 0) but it is within the [-1,1] margin. There is a 0.5 loss incurred by
+    # this example. The 2nd prediction is outside the margin so it incurs no
+    # loss at all. The overall (normalized) loss is therefore 0.5/(1+1) = 0.25.
+    with tf.Session() as sess:
+      self.assertAlmostEqual(0.25, sess.run(model_fn_ops.loss))
+
+  def testBinarySVMWithWeights(self):
+    head = head_lib._binary_svm_head(
+        weight_column_name="weights")
+    predictions = tf.constant([[-0.7], [0.2]])
+    targets = tf.constant([0, 1])
+    features = {"weights": tf.constant([2.0, 10.0])}
+    model_fn_ops = head.head_ops(features, targets,
+                                 tf.contrib.learn.ModeKeys.TRAIN,
+                                 None, logits=predictions)
+    # Prediction for both examples are in the right side of the hyperplane but
+    # within the margin. The (weighted) loss incurred is 2*0.3=0.6 and 10*0.8=8
+    # respectively. The overall (normalized) loss is therefore 8.6/12.
+    with tf.Session() as sess:
+      self.assertAlmostEqual(8.6 / 2, sess.run(model_fn_ops.loss), places=3)
+
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/tensorflow/contrib/learn/python/learn/estimators/linear.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py
@ -34,7 +34,6 @@ from tensorflow.contrib.framework.python.ops import variables as contrib_variabl
 from tensorflow.contrib.layers.python.layers import target_column
 from tensorflow.contrib.learn.python.learn import evaluable
 from tensorflow.contrib.learn.python.learn import metric_spec
-from tensorflow.contrib.learn.python.learn import session_run_hook
 from tensorflow.contrib.learn.python.learn import trainable
 from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined
 from tensorflow.contrib.learn.python.learn.estimators import estimator
@ -53,12 +52,18 @@ from tensorflow.python.ops import nn
 from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import session_run_hook
 from tensorflow.python.training import training as train

 _CLASSES = "classes"
 _LOGISTIC = "logistic"
 _PROBABILITIES = "probabilities"

+# The default learning rate of 0.2 is a historical artifact of the initial
+# implementation, but seems a reasonable choice.
+_LEARNING_RATE = 0.2
+

 def _get_metric_args(metric):
  if hasattr(metric, "__code__"):
@ -86,7 +91,7 @@ def _wrap_metric(metric):
 def _get_optimizer(spec):
  if isinstance(spec, six.string_types):
    return layers.OPTIMIZER_CLS_NAMES[spec](
-        learning_rate=0.2)
+        learning_rate=_LEARNING_RATE)
  elif callable(spec):
    return spec()
  return spec
@ -171,10 +176,45 @@ def _weighted_loss(loss, weight_tensor):


 def _linear_classifier_model_fn(features, targets, mode, params):
-  """Estimator's linear model_fn."""
+  """Linear classifier model_fn.
+
+  Args:
+    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
+    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
+      dtype `int32` or `int64` in the range `[0, n_classes)`.
+    mode: Defines whether this is training, evaluation or prediction.
+      See `ModeKeys`.
+    params: A dict of hyperparameters.
+      The following hyperparameters are expected:
+      * feature_columns: An iterable containing all the feature columns used by
+          the model.
+      * n_classes: number of target classes.
+      * weight_column_name: A string defining the weight feature column, or
+          None if there are no weights.
+      * optimizer: string, `Optimizer` object, or callable that defines the
+          optimizer to use for training.
+      * gradient_clip_norm: A float > 0. If provided, gradients are
+          clipped to their global norm with this clipping ratio.
+      * enable_centered_bias: A bool. If True, estimator will learn a centered
+          bias variable for each class. Rest of the model structure learns the
+          residual after centered bias.
+      * num_ps_replicas: The number of parameter server replicas.
+      * joint_weights: If True, the weights for all columns will be stored in a
+        single (possibly partitioned) variable. It's more efficient, but it's
+        incompatible with SDCAOptimizer, and requires all feature columns are
+        sparse and use the 'sum' combiner.
+
+  Returns:
+    predictions: A dict of `Tensor` objects.
+    loss: A scalar containing the loss of the step.
+    train_op: The op for training.
+
+  Raises:
+    ValueError: If mode is not any of the `ModeKeys`.
+  """
+  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  weight_column_name = params["weight_column_name"]
-  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  gradient_clip_norm = params.get("gradient_clip_norm", None)
  enable_centered_bias = params.get("enable_centered_bias", True)
@ -184,25 +224,24 @@ def _linear_classifier_model_fn(features, targets, mode, params):
  if not isinstance(features, dict):
    features = {"": features}

+  parent_scope = "linear"
  num_label_columns = 1 if n_classes == 2 else n_classes
  loss_fn = _softmax_cross_entropy_loss
  if n_classes == 2:
    loss_fn = _log_loss_with_two_classes

-  feat_values = (features.values() if isinstance(features, dict)
-                 else [features])
  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas,
      min_slice_size=64 << 20)
  with variable_scope.variable_op_scope(
-      feat_values, "linear", partitioner=partitioner) as scope:
+      features.values(), parent_scope, partitioner=partitioner) as scope:
    if joint_weights:
      logits, _, _ = (
          layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
-              weight_collections=["linear"],
+              weight_collections=[parent_scope],
              scope=scope))
    else:
      logits, _, _ = (
@ -210,7 +249,7 @@ def _linear_classifier_model_fn(features, targets, mode, params):
              columns_to_tensors=features,
              feature_columns=feature_columns,
              num_outputs=num_label_columns,
-              weight_collections=["linear"],
+              weight_collections=[parent_scope],
              scope=scope))

  if enable_centered_bias:
@ -252,11 +291,39 @@ def _linear_classifier_model_fn(features, targets, mode, params):


 def sdca_classifier_model_fn(features, targets, mode, params):
-  """Estimator's linear model_fn."""
+  """Linear classifier model_fn that uses the SDCA optimizer.
+
+  Args:
+    features: A dict of `Tensor` keyed by column name.
+    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
+      dtype `int32` or `int64` in the range `[0, n_classes)`.
+    mode: Defines whether this is training, evaluation or prediction.
+      See `ModeKeys`.
+    params: A dict of hyperparameters.
+      The following hyperparameters are expected:
+      * feature_columns: An iterable containing all the feature columns used by
+          the model.
+      * optimizer: An `SDCAOptimizer` instance.
+      * weight_column_name: A string defining the weight feature column, or
+          None if there are no weights.
+      * loss_type: A string. Must be either "logistic_loss" or "hinge_loss".
+      * update_weights_hook: A `SessionRunHook` object or None. Used to update
+          model weights.
+
+  Returns:
+    predictions: A dict of `Tensor` objects.
+    loss: A scalar containing the loss of the step.
+    train_op: The op for training.
+
+  Raises:
+    ValueError: If `optimizer` is not an `SDCAOptimizer` instance.
+    ValueError: If mode is not any of the `ModeKeys`.
+  """
  feature_columns = params["feature_columns"]
  optimizer = params["optimizer"]
  weight_column_name = params["weight_column_name"]
  loss_type = params["loss_type"]
+  update_weights_hook = params.get("update_weights_hook")

  if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
    raise ValueError("Optimizer must be of type SDCAOptimizer")
@ -283,9 +350,12 @@ def sdca_classifier_model_fn(features, targets, mode, params):
  train_op = None
  if mode == estimator.ModeKeys.TRAIN:
    global_step = contrib_variables.get_global_step()
-    train_op = optimizer.get_train_step(
-        columns_to_variables, weight_column_name, loss_type, features,
-        targets, global_step)
+    sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
+                                                    weight_column_name,
+                                                    loss_type, features,
+                                                    targets, global_step)
+    if update_weights_hook is not None:
+      update_weights_hook.set_parameters(sdca_model, train_op)

  predictions = {}
  predictions[_LOGISTIC] = math_ops.sigmoid(logits)
@ -298,10 +368,32 @@ def sdca_classifier_model_fn(features, targets, mode, params):

 # Ensures consistency with LinearComposableModel.
 def _get_default_optimizer(feature_columns):
-  learning_rate = min(0.2, 1.0 / math.sqrt(len(feature_columns)))
+  learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns)))
  return train.FtrlOptimizer(learning_rate=learning_rate)


+class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook):
+  """SessionRunHook to update and shrink SDCA model weights."""
+
+  def __init__(self):
+    pass
+
+  def set_parameters(self, sdca_model, train_op):
+    self._sdca_model = sdca_model
+    self._train_op = train_op
+
+  def begin(self):
+    """Construct the update_weights op.
+
+    The op is implicitly added to the default graph.
+    """
+    self._update_op = self._sdca_model.update_weights(self._train_op)
+
+  def before_run(self, run_context):
+    """Return the update_weights op so that it is executed during this run."""
+    return session_run_hook.SessionRunArgs(self._update_op)
+
+
 class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
  """Linear classifier model.

@ -431,15 +523,23 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
      self._optimizer = _get_optimizer(optimizer)
    num_ps_replicas = config.num_ps_replicas if config else 0

+    chief_hook = None
    if isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
      assert not _joint_weight, ("_joint_weight is incompatible with the"
                                 " SDCAOptimizer")
      model_fn = sdca_classifier_model_fn
+      # We use a hook to perform the weight update and shrink step only on the
+      # chief. Because the SdcaModel constructed by the estimator within the
+      # call to fit() but we need to pass the hook to fit(), we pass the hook
+      # as a parameter to the model_fn and have that propagate the model to the
+      # hook.
+      chief_hook = _SdcaUpdateWeightsHook()
      params = {
          "feature_columns": feature_columns,
          "optimizer": self._optimizer,
          "weight_column_name": weight_column_name,
          "loss_type": "logistic_loss",
+          "update_weights_hook": chief_hook,
      }
    else:
      model_fn = _linear_classifier_model_fn
@ -461,6 +561,10 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
        params=params,
        feature_engineering_fn=feature_engineering_fn)

+    self._additional_run_hook = None
+    if self._estimator.config.is_chief:
+      self._additional_run_hook = chief_hook
+
  def get_estimator(self):
    return self._estimator

@ -468,22 +572,24 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
          monitors=None, max_steps=None):
    """See trainable.Trainable."""
    # TODO(roumposg): Remove when deprecated monitors are removed.
-    if monitors is not None:
-      deprecated_monitors = [
-          m for m in monitors
-          if not isinstance(m, session_run_hook.SessionRunHook)
-      ]
-      for monitor in deprecated_monitors:
-        monitor.set_estimator(self)
-        monitor._lock_estimator()  # pylint: disable=protected-access
+    if monitors is None:
+      monitors = []
+    deprecated_monitors = [
+        m for m in monitors
+        if not isinstance(m, session_run_hook.SessionRunHook)
+    ]
+    for monitor in deprecated_monitors:
+      monitor.set_estimator(self)
+      monitor._lock_estimator()  # pylint: disable=protected-access

+    if self._additional_run_hook:
+      monitors.append(self._additional_run_hook)
    result = self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps,
                                 batch_size=batch_size, monitors=monitors,
                                 max_steps=max_steps)

-    if monitors is not None:
-      for monitor in deprecated_monitors:
-        monitor._unlock_estimator()  # pylint: disable=protected-access
+    for monitor in deprecated_monitors:
+      monitor._unlock_estimator()  # pylint: disable=protected-access

    return result

@ -712,6 +818,12 @@ class LinearRegressor(dnn_linear_combined.DNNLinearCombinedRegressor):
    if enable_centered_bias is None:
      enable_centered_bias = True
      dnn_linear_combined._changing_default_center_bias()  # pylint: disable=protected-access
+
+    if isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
+      enable_centered_bias = False
+      logging.warning("centered_bias is not supported with SDCA, "
+                      "please disable it explicitly.")
+    self._weight_column_name = weight_column_name
    self._joint_weights = _joint_weights
    super(LinearRegressor, self).__init__(
        model_dir=model_dir,
@ -737,20 +849,22 @@ class LinearRegressor(dnn_linear_combined.DNNLinearCombinedRegressor):
        layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=self._linear_feature_columns,
-            num_outputs=self._target_column.num_label_columns,
+            num_outputs=self._head.logits_dimension,
            weight_collections=[self._linear_model.get_scope_name()],
            scope=self._linear_model.get_scope_name()))
-    with ops.control_dependencies([self._centered_bias()]):
-      loss = self._target_column.loss(logits, targets, features)
-      logging_ops.scalar_summary("loss", loss)
+    _add_bias_column(self._linear_feature_columns, features, bias, targets,
+                     columns_to_variables)

-      _add_bias_column(self._linear_feature_columns, features, bias, targets,
-                       columns_to_variables)
+    def _train_op_fn(unused_loss):
+      sdca_model, train_op = self._linear_optimizer.get_train_step(
+          columns_to_variables, self._weight_column_name,
+          self._loss_type(), features, targets, global_step)
+      return sdca_model.update_weights(train_op)

-    train_op = self._linear_optimizer.get_train_step(
-        columns_to_variables, self._target_column.weight_column_name,
-        self._loss_type(), features, targets, global_step)
-    return train_op, loss
+    model_fn_ops = self._head.head_ops(features, targets,
+                                       estimator.ModeKeys.TRAIN, _train_op_fn,
+                                       logits=logits)
+    return model_fn_ops.training_op, model_fn_ops.loss

  def _loss_type(self):
    return "squared_loss"
--- a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py
@ -257,10 +257,11 @@ class LinearClassifierTest(tf.test.TestCase):
  def testCustomMetrics(self):
    """Tests custom evaluation metrics."""

-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
      target = tf.constant([[1], [0], [0], [0]], dtype=tf.float32)
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32)}
+      features = {'x': tf.train.limit_epochs(
+          tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)}
      return features, target

    def _my_metric_op(predictions, targets):
@ -272,9 +273,9 @@ class LinearClassifierTest(tf.test.TestCase):
    classifier = tf.contrib.learn.LinearClassifier(
        feature_columns=[tf.contrib.layers.real_valued_column('x')])

-    classifier.fit(input_fn=_input_fn_train, steps=100)
+    classifier.fit(input_fn=_input_fn, steps=100)
    scores = classifier.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
        steps=100,
        metrics={
            'my_accuracy': MetricSpec(
@ -289,7 +290,8 @@ class LinearClassifierTest(tf.test.TestCase):
    self.assertTrue(
        set(['loss', 'my_accuracy', 'my_precision', 'my_metric'
            ]).issubset(set(scores.keys())))
-    predictions = classifier.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(list(classifier.predict(input_fn=predict_input_fn)))
    self.assertEqual(_sklearn.accuracy_score([1, 0, 0, 0], predictions),
                     scores['my_accuracy'])

@ -297,14 +299,14 @@ class LinearClassifierTest(tf.test.TestCase):
    # "probabilities".
    with self.assertRaises(ValueError):
      classifier.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
          steps=100,
          metrics={('bad_name', 'bad_type'): tf.contrib.metrics.streaming_auc})

    # Test the case where the tuple of the key doesn't have 2 elements.
    with self.assertRaises(ValueError):
      classifier.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
          steps=100,
          metrics={
              ('bad_length_name', 'classes', 'bad_length'):
@ -987,10 +989,11 @@ class LinearRegressorTest(tf.test.TestCase):

  def testCustomMetrics(self):
    """Tests custom evaluation metrics."""
-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
      target = tf.constant([[1.], [0.], [0.], [0.]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {'x': tf.train.limit_epochs(
+          tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)}
      return features, target

    def _my_metric_op(predictions, targets):
@ -1000,9 +1003,9 @@ class LinearRegressorTest(tf.test.TestCase):
        feature_columns=[tf.contrib.layers.real_valued_column('x')],
        config=tf.contrib.learn.RunConfig(tf_random_seed=1))

-    regressor.fit(input_fn=_input_fn_train, steps=100)
+    regressor.fit(input_fn=_input_fn, steps=100)
    scores = regressor.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
        steps=1,
        metrics={
            'my_error': tf.contrib.metrics.streaming_mean_squared_error,
@ -1011,15 +1014,16 @@ class LinearRegressorTest(tf.test.TestCase):
    self.assertIn('loss', set(scores.keys()))
    self.assertIn('my_error', set(scores.keys()))
    self.assertIn('my_metric', set(scores.keys()))
-    predictions = regressor.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(list(regressor.predict(input_fn=predict_input_fn)))
    self.assertAlmostEqual(
        _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
        scores['my_error'])

    # Tests that when the key is a tuple, an error is raised.
-    with self.assertRaises(TypeError):
+    with self.assertRaises(KeyError):
      regressor.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
          steps=1,
          metrics={('my_error', 'predictions'
                   ): tf.contrib.metrics.streaming_mean_squared_error})
--- a/tensorflow/contrib/learn/python/learn/estimators/multioutput_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/multioutput_test.py
@ -39,7 +39,7 @@ class MultiOutputTest(tf.test.TestCase):
        feature_columns=learn.infer_real_valued_columns_from_input(x),
        target_dimension=2)
    regressor.fit(x, y, steps=100)
-    score = mean_squared_error(regressor.predict(x), y)
+    score = mean_squared_error(np.array(list(regressor.predict(x))), y)
    self.assertLess(score, 10, "Failed with score = {0}".format(score))


--- a/tensorflow/contrib/learn/python/learn/estimators/nonlinear_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/nonlinear_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
@ -114,7 +114,7 @@ class TensorForestEstimator(estimator.BaseEstimator):
      as_iterable=False)
  def predict_proba(
      self, x=None, input_fn=None, batch_size=None, outputs=None,
-      as_iterable=False):
+      as_iterable=True):
    """Returns prediction probabilities for given features (classification).

    Args:
@ -148,7 +148,7 @@ class TensorForestEstimator(estimator.BaseEstimator):
      as_iterable=False)
  def predict(
      self, x=None, input_fn=None, axis=None, batch_size=None, outputs=None,
-      as_iterable=False):
+      as_iterable=True):
    """Returns predictions for given features.

    Args:
--- a/tensorflow/contrib/learn/python/learn/estimators/regression_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/regression_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
@ -130,6 +130,8 @@ class RunConfig(object):
    # If not explicitly specified in the constructor and the TF_CONFIG
    # environment variable is present, load cluster_spec from TF_CONFIG.
    config = json.loads(os.environ.get('TF_CONFIG') or '{}')
+    environment = config.get('environment', 'local')
+
    if not cluster_spec and 'cluster' in config:
      cluster_spec = ClusterSpec(config['cluster'])
    self.cluster_spec = cluster_spec
@ -138,6 +140,7 @@ class RunConfig(object):
    # otherwise, if the TF_CONFIG environment variable is present, use that.
    # Otherwise, use the respective default (None / 0).
    task_env = config.get('task', {})
+
    self._job_name = job_name or task_env.get('type') or None
    self.task = task if task is not None else task_env.get('index') or 0

@ -151,11 +154,13 @@ class RunConfig(object):
    self.num_ps_replicas = num_ps_replicas or _count_ps(self.cluster_spec) or 0

    # Set is_chief.
+    # TODO(b/32117298): cleanup environment-specific logic for setting is_chief
+    # once the environments have been further unified.
    self._is_chief = is_chief
    if self._is_chief is None:
      if not self._job_name:
        self._is_chief = (self.task == 0)
-      elif config:
+      elif config and environment == 'cloud':
        # When the TF_CONFIG environment variable is set, we can set the
        # default of is_chief to 0 when job_name is "master" and task is 0.
        self._is_chief = (self._job_name == 'master' and self.task == 0)
@ -176,11 +181,19 @@ class RunConfig(object):
            'job_name is \'%s\', but only masters or workers may be chiefs. '
            'Please check is_chief and job_name, which may have been set in '
            'TF_CONFIG environment variable.' % (self._job_name,))
-    elif (self._is_chief is False and self._job_name == 'master' and
-          self.task == 0):
-      raise ValueError(
-          'Master task 0 must be chief. Please check is_chief, job_name, and '
-          'task, which may have been set in TF_CONFIG environment variable.')
+    elif self._is_chief is False:
+      if environment == 'cloud':
+        if self._job_name == 'master' and self.task == 0:
+          raise ValueError(
+              'Master task 0 must be chief for cloud. Please check is_chief, '
+              'job_name, and task, which may have been set in TF_CONFIG '
+              'environment variable.')
+      else:
+        if self._job_name == 'worker' and self.task == 0:
+          raise ValueError(
+              'Worker task 0 must be chief. Please check is_chief, job_name, '
+              'and task, which may have been set in TF_CONFIG environment '
+              'variable.')

    self.evaluation_master = evaluation_master or ''

--- a/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py
@ -189,20 +189,47 @@ class RunConfigTest(tf.test.TestCase):
    # Basically, just make sure no exception is being raised.
    self.assertEquals(config.num_ps_replicas, 2)

-  def test_is_chief_from_tf_config(self):
+  def test_is_chief_from_cloud_tf_config(self):
    # is_chief should be true when ["task"]["type"] == "master" and
-    # index == 0. Note that test_values_from_tf_config covers the
-    # non-master case.
+    # index == 0 and ["task"]["environment"] == "cloud". Note that
+    # test_values_from_tf_config covers the non-master case.
    tf_config = {"cluster": {"ps": ["host1:1", "host2:2"],
                             "master": ["host3:3"],
                             "worker": ["host4:4", "host5:5", "host6:6"]},
                 "task": {"type": "master",
-                          "index": 0}}
+                          "index": 0},
+                 "environment": "cloud"}
    with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
      config = run_config.RunConfig()

    self.assertTrue(config.is_chief)

+  def test_is_chief_from_noncloud_tf_config(self):
+    # is_chief should be true when ["task"]["type"] == "worker" and
+    # index == 0 if ["task"]["environment"] != "cloud".
+    tf_config = {"cluster": {"ps": ["host1:1", "host2:2"],
+                             "master": ["host3:3"],
+                             "worker": ["host4:4", "host5:5", "host6:6"]},
+                 "task": {"type": "worker",
+                          "index": 0},
+                 "environment": "random"}
+    with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
+      config = run_config.RunConfig()
+
+    self.assertTrue(config.is_chief)
+
+    # But task 0 for a job named "master" should not be.
+    tf_config = {"cluster": {"ps": ["host1:1", "host2:2"],
+                             "master": ["host3:3"],
+                             "worker": ["host4:4", "host5:5", "host6:6"]},
+                 "task": {"type": "master",
+                          "index": 0},
+                 "environment": "random"}
+    with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
+      config = run_config.RunConfig()
+
+    self.assertFalse(config.is_chief)
+
  def test_default_is_chief_from_tf_config_without_job_name(self):
    tf_config = {"cluster": {},
                 "task": {}}
@ -245,8 +272,15 @@ class RunConfigTest(tf.test.TestCase):
    with self.assertRaisesRegexp(ValueError, msg):
      run_config.RunConfig(is_chief=True, task=0, job_name="ps")

-    with self.assertRaisesRegexp(ValueError, "Master task 0 must be chief"):
-      run_config.RunConfig(is_chief=False, task=0, job_name="master")
+    msg = "Master task 0 must be chief for cloud"
+    with self.assertRaisesRegexp(ValueError, msg):
+      tf_config = {"environment": "cloud"}
+      with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
+        run_config.RunConfig(is_chief=False, task=0, job_name="master")
+
+    msg = "Worker task 0 must be chief"
+    with self.assertRaisesRegexp(ValueError, msg):
+      run_config.RunConfig(is_chief=False, task=0, job_name="worker")


 if __name__ == "__main__":
--- a/tensorflow/contrib/learn/python/learn/estimators/stability_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/stability_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/svm.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/svm.py
@ -55,7 +55,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
  method), should be set to (#concurrent train ops per worker) x (#workers). If
  num_loss_partitions is larger or equal to this value, convergence is
  guaranteed but becomes slower as num_loss_partitions increases. If it is set
-  to a smaller value, the optimizer is more agressive in reducing the global
+  to a smaller value, the optimizer is more aggressive in reducing the global
  loss but convergence is not guaranteed. The recommended value in tf.learn
  (where there is one process per worker) is the number of workers running the
  train steps. It defaults to 1 (single machine).
@ -146,6 +146,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable):

    self._feature_columns = feature_columns
    self._model_dir = model_dir or tempfile.mkdtemp()
+    self._chief_hook = linear._SdcaUpdateWeightsHook()  # pylint: disable=protected-access
    self._estimator = estimator.Estimator(
        model_fn=linear.sdca_classifier_model_fn,
        model_dir=self._model_dir,
@ -155,12 +156,19 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
            "optimizer": self._optimizer,
            "weight_column_name": weight_column_name,
            "loss_type": "hinge_loss",
+            "update_weights_hook": self._chief_hook,
        },
        feature_engineering_fn=feature_engineering_fn)
+    if not self._estimator.config.is_chief:
+      self._chief_hook = None

  def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
          monitors=None, max_steps=None):
    """See trainable.Trainable."""
+    if monitors is None:
+      monitors = []
+    if self._chief_hook:
+      monitors.append(self._chief_hook)
    return self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps,
                               batch_size=batch_size, monitors=monitors,
                               max_steps=max_steps)
--- a/tensorflow/contrib/learn/python/learn/experiment.py
+++ b/tensorflow/contrib/learn/python/learn/experiment.py
@ -223,6 +223,7 @@ class Experiment(object):
      logging.info("Waiting %f secs before starting eval.", delay_secs)
      time.sleep(delay_secs)

+    last_fitted_error_time = 0
    while True:
      start = time.time()
      try:
@ -231,7 +232,13 @@ class Experiment(object):
                                 metrics=self._eval_metrics,
                                 name=name)
      except NotFittedError:
-        logging.warning("Estimator is not fitted yet, skipping evaluation.")
+        # Print warning message every 10 mins.
+        if time.time() - last_fitted_error_time > 600:
+          logging.warning(
+              "Estimator is not fitted yet. "
+              "Will start an evaluation when a checkpoint will be ready.")
+          last_fitted_error_time = time.time()
+
      duration = time.time() - start
      if duration < throttle_delay_secs:
        difference = throttle_delay_secs - duration
--- a/tensorflow/contrib/learn/python/learn/tests/experiment_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/experiment_test.py
--- a/tensorflow/contrib/learn/python/learn/graph_actions.py
+++ b/tensorflow/contrib/learn/python/learn/graph_actions.py
@ -126,6 +126,7 @@ def _monitored_train(graph,
                     supervisor_save_model_secs=600,
                     supervisor_save_model_steps=None,
                     keep_checkpoint_max=5,
+                     supervisor_save_summaries_secs=None,
                     supervisor_save_summaries_steps=100,
                     feed_fn=None,
                     steps=None,
@ -164,7 +165,7 @@ def _monitored_train(graph,
      current loss. A `0` or negative value disables logging.
    supervisor_is_chief: Whether the current process is the chief supervisor in
      charge of restoring the model and running standard services.
-    supervisor_master: The master string to use when preparing the session.      
+    supervisor_master: The master string to use when preparing the session.
    supervisor_save_model_secs: Save checkpoints every this many seconds. Can
        not be specified with `supervisor_save_model_steps`.
    supervisor_save_model_steps: Save checkpoints every this many steps. Can not
@ -173,8 +174,12 @@ def _monitored_train(graph,
      keep. As new files are created, older files are deleted. If None or 0,
      all checkpoint files are kept. This is simply passed as the max_to_keep
      arg to `tf.Saver` constructor.
+    supervisor_save_summaries_secs: Save summaries every
+      `supervisor_save_summaries_secs` seconds when training.
    supervisor_save_summaries_steps: Save summaries every
-      `supervisor_save_summaries_steps` seconds when training.
+      `supervisor_save_summaries_steps` steps when training. Exactly one of
+      `supervisor_save_model_steps` and `supervisor_save_model_secs` should be
+      specified, and the other should be None.
    feed_fn: A function that is called every iteration to produce a `feed_dict`
      passed to `session.run` calls. Optional.
    steps: Trains for this many steps (e.g. current global step + `steps`).
@ -267,6 +272,7 @@ def _monitored_train(graph,
              summary_writer=summary_writer))
      all_hooks.append(
          basic_session_run_hooks.SummarySaverHook(
+              save_secs=supervisor_save_summaries_secs,
              save_steps=supervisor_save_summaries_steps,
              summary_writer=summary_writer,
              scaffold=scaffold))
--- a/tensorflow/contrib/learn/python/learn/tests/graph_actions_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/graph_actions_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/grid_search_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/grid_search_test.py
--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
@ -28,15 +28,33 @@ from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import variables as var_ops
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import input as input_ops
 from tensorflow.python.training import queue_runner

+
 # Default name for key in the feature dict.
 KEY_FEATURE_NAME = '__key__'


+def _check_enqueue_params(num_queue_runners, num_enqueue_threads):
+  """Check enqueue paramerters for deprecation of `num_queue_runners`."""
+  if num_queue_runners is not None:
+    # TODO(yifanchen): Remove on Nov 21 2016.
+    logging.warning('`num_queue_runners` is deprecated, it will be removed on '
+                    'Nov 21 2016')
+    if num_enqueue_threads is not None:
+      raise ValueError('`num_queue_runners` and `num_enqueue_threads` can not '
+                       'both be set.')
+  elif num_enqueue_threads is None:
+    logging.warning('Default behavior will change and `num_queue_runners` '
+                    'will be replaced by `num_enqueue_threads`.')
+    num_queue_runners = 2
+  return num_queue_runners, num_enqueue_threads
+
+
 def read_batch_examples(file_pattern, batch_size, reader,
                        randomize_input=True, num_epochs=None,
                        queue_capacity=10000, num_threads=1,
@ -134,7 +152,107 @@ def read_keyed_batch_examples(
  Raises:
    ValueError: for invalid inputs.
  """
-  # Retrieve files to read.
+  return _read_keyed_batch_examples_helper(
+      file_pattern,
+      batch_size,
+      reader,
+      randomize_input,
+      num_epochs,
+      queue_capacity,
+      num_threads,
+      read_batch_size,
+      parse_fn,
+      setup_shared_queue=False,
+      name=name)
+
+
+def _read_keyed_batch_examples_shared_queue(file_pattern,
+                                            batch_size,
+                                            reader,
+                                            randomize_input=True,
+                                            num_epochs=None,
+                                            queue_capacity=10000,
+                                            num_threads=1,
+                                            read_batch_size=1,
+                                            parse_fn=None,
+                                            name=None):
+  """Adds operations to read, queue, batch `Example` protos.
+
+  Given file pattern (or list of files), will setup a shared queue for file
+  names, setup a worker queue that pulls from the shared queue, read `Example`
+  protos using provided `reader`, use batch queue to create batches of examples
+  of size `batch_size`. This provides at most once visit guarantees. Note that
+  this only works if the parameter servers are not pre-empted or restarted or
+  the session is not restored from a checkpoint since the state of a queue
+  is not checkpointed and we will end up restarting from the entire list of
+  files.
+
+  All queue runners are added to the queue runners collection, and may be
+  started via `start_queue_runners`.
+
+  All ops are added to the default graph.
+
+  Use `parse_fn` if you need to do parsing / processing on single examples.
+
+  Args:
+    file_pattern: List of files or pattern of file paths containing
+        `Example` records. See `tf.gfile.Glob` for pattern rules.
+    batch_size: An int or scalar `Tensor` specifying the batch size to use.
+    reader: A function or class that returns an object with
+      `read` method, (filename tensor) -> (example tensor).
+    randomize_input: Whether the input should be randomized.
+    num_epochs: Integer specifying the number of times to read through the
+      dataset. If `None`, cycles through the dataset forever.
+      NOTE - If specified, creates a variable that must be initialized, so call
+      `tf.initialize_all_variables()` as shown in the tests.
+    queue_capacity: Capacity for input queue.
+    num_threads: The number of threads enqueuing examples.
+    read_batch_size: An int or scalar `Tensor` specifying the number of
+      records to read at once
+    parse_fn: Parsing function, takes `Example` Tensor returns parsed
+      representation. If `None`, no parsing is done.
+    name: Name of resulting op.
+
+  Returns:
+    Returns tuple of:
+    - `Tensor` of string keys.
+    - String `Tensor` of batched `Example` proto.
+
+  Raises:
+    ValueError: for invalid inputs.
+  """
+  return _read_keyed_batch_examples_helper(
+      file_pattern,
+      batch_size,
+      reader,
+      randomize_input,
+      num_epochs,
+      queue_capacity,
+      num_threads,
+      read_batch_size,
+      parse_fn,
+      setup_shared_queue=True,
+      name=name)
+
+
+def _get_shared_file_name_queue(file_names, shuffle, num_epochs, name):
+  # Creating a dummy variable so we can put the shared queue in ps if there is
+  # a PS and in the worker otherwise. TODO(rohanj): Figure out how to place an
+  # op on PS without this hack
+  with ops.Graph().as_default():
+    dummy_var = var_ops.Variable(initial_value=0, name='dummy_var')
+  with ops.device(dummy_var.device):
+    shared_file_name_queue = input_ops.string_input_producer(
+        constant_op.constant(
+            file_names, name='input'),
+        shuffle=shuffle,
+        num_epochs=num_epochs,
+        name=name,
+        shared_name=name)
+    return shared_file_name_queue
+
+
+def _get_file_names(file_pattern, randomize_input):
  if isinstance(file_pattern, list):
    file_names = file_pattern
    if not file_names:
@ -148,6 +266,46 @@ def read_keyed_batch_examples(
  # in `string_input_producer` if `randomize_input` is enabled.
  if not randomize_input:
    file_names = sorted(file_names)
+  return file_names
+
+
+def _get_examples(file_name_queue, reader, num_threads, read_batch_size,
+                  parse_fn):
+  with ops.name_scope('read'):
+    example_list = []
+    for _ in range(num_threads):
+      if read_batch_size > 1:
+        keys, examples_proto = reader().read_up_to(file_name_queue,
+                                                   read_batch_size)
+      else:
+        keys, examples_proto = reader().read(file_name_queue)
+      if parse_fn:
+        parsed_examples = parse_fn(examples_proto)
+        # Map keys into example map because batch_join doesn't support
+        # tuple of Tensor + dict.
+        if isinstance(parsed_examples, dict):
+          parsed_examples[KEY_FEATURE_NAME] = keys
+          example_list.append(parsed_examples)
+        else:
+          example_list.append((keys, parsed_examples))
+      else:
+        example_list.append((keys, examples_proto))
+    return example_list
+
+
+def _read_keyed_batch_examples_helper(file_pattern,
+                                      batch_size,
+                                      reader,
+                                      randomize_input=True,
+                                      num_epochs=None,
+                                      queue_capacity=10000,
+                                      num_threads=1,
+                                      read_batch_size=1,
+                                      parse_fn=None,
+                                      setup_shared_queue=False,
+                                      name=None):
+  # Retrieve files to read.
+  file_names = _get_file_names(file_pattern, randomize_input)

  # Check input parameters are given and reasonable.
  if (not queue_capacity) or (queue_capacity <= 0):
@ -168,33 +326,25 @@ def read_keyed_batch_examples(
    raise ValueError('Invalid num_epochs %s.' % num_epochs)

  with ops.name_scope(name, 'read_batch_examples', [file_pattern]) as scope:
-    # Setup filename queue with shuffling.
    with ops.name_scope('file_name_queue') as file_name_queue_scope:
-      file_name_queue = input_ops.string_input_producer(
-          constant_op.constant(file_names, name='input'),
-          shuffle=randomize_input, num_epochs=num_epochs,
-          name=file_name_queue_scope)
+      if setup_shared_queue:
+        shared_file_name_queue = _get_shared_file_name_queue(
+            file_names, randomize_input, num_epochs, file_name_queue_scope)
+        file_name_queue = data_flow_ops.FIFOQueue(
+            capacity=1, dtypes=[dtypes.string], shapes=[[]])
+        enqueue_op = file_name_queue.enqueue(shared_file_name_queue.dequeue())
+        queue_runner.add_queue_runner(
+            queue_runner.QueueRunner(file_name_queue, [enqueue_op]))
+      else:
+        file_name_queue = input_ops.string_input_producer(
+            constant_op.constant(
+                file_names, name='input'),
+            shuffle=randomize_input,
+            num_epochs=num_epochs,
+            name=file_name_queue_scope)

-    # Create readers, one per thread and set them to read from filename queue.
-    with ops.name_scope('read'):
-      example_list = []
-      for _ in range(num_threads):
-        if read_batch_size > 1:
-          keys, examples_proto = reader().read_up_to(file_name_queue,
-                                                     read_batch_size)
-        else:
-          keys, examples_proto = reader().read(file_name_queue)
-        if parse_fn:
-          parsed_examples = parse_fn(examples_proto)
-          # Map keys into example map because batch_join doesn't support
-          # tuple of Tensor + dict.
-          if isinstance(parsed_examples, dict):
-            parsed_examples[KEY_FEATURE_NAME] = keys
-            example_list.append(parsed_examples)
-          else:
-            example_list.append((keys, parsed_examples))
-        else:
-          example_list.append((keys, examples_proto))
+    example_list = _get_examples(file_name_queue, reader, num_threads,
+                                 read_batch_size, parse_fn)

    enqueue_many = read_batch_size > 1

@ -234,7 +384,8 @@ def read_keyed_batch_features(file_pattern,
                              queue_capacity=10000,
                              reader_num_threads=1,
                              feature_queue_capacity=100,
-                              num_queue_runners=2,
+                              num_queue_runners=None,
+                              num_enqueue_threads=None,
                              parse_fn=None,
                              name=None):
  """Adds operations to read, queue, batch and parse `Example` protos.
@ -265,10 +416,17 @@ def read_keyed_batch_features(file_pattern,
    queue_capacity: Capacity for input queue.
    reader_num_threads: The number of threads to read examples.
    feature_queue_capacity: Capacity of the parsed features queue.
-    num_queue_runners: Number of queue runners to start for the feature queue,
-      Adding multiple queue runners for the parsed example queue helps maintain
+    num_queue_runners: Deprecated. Defaults to 2 if this and
+      `num_enqueue_threads` are both `None`. This is the number of queue
+      runners to start for the feature queue. Adding multiple queue runners for
+      the parsed example queue helps maintain a full queue when the subsequent
+      computations overall are cheaper than parsing. This argument will be
+      deprecated and replaced with `num_enqueue_threads`.
+    num_enqueue_threads: Number of threads to enqueue the parsed example queue.
+      Using multiple threads to enqueue the parsed example queue helps maintain
      a full queue when the subsequent computations overall are cheaper than
-      parsing.
+      parsing. This argument will replace `num_queue_runners`. This and
+      `num_queue_runners` can not both be set.
    parse_fn: Parsing function, takes `Example` Tensor returns parsed
      representation. If `None`, no parsing is done.
    name: Name of resulting op.
@ -282,6 +440,9 @@ def read_keyed_batch_features(file_pattern,
    ValueError: for invalid inputs.
  """

+  num_queue_runners, num_enqueue_threads = _check_enqueue_params(
+      num_queue_runners, num_enqueue_threads)
+
  with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope:
    keys, examples = read_keyed_batch_examples(
        file_pattern, batch_size, reader, randomize_input=randomize_input,
@ -290,6 +451,88 @@ def read_keyed_batch_features(file_pattern,
        parse_fn=parse_fn, name=scope)
    # Parse the example.
    feature_map = parsing_ops.parse_example(examples, features)
+    return queue_parsed_features(
+        feature_map,
+        keys=keys,
+        feature_queue_capacity=feature_queue_capacity,
+        num_queue_runners=num_queue_runners,
+        num_enqueue_threads=num_enqueue_threads,
+        name=scope)
+
+
+def _read_keyed_batch_features_shared_queue(file_pattern,
+                                            batch_size,
+                                            features,
+                                            reader,
+                                            randomize_input=True,
+                                            num_epochs=None,
+                                            queue_capacity=10000,
+                                            reader_num_threads=1,
+                                            feature_queue_capacity=100,
+                                            num_queue_runners=2,
+                                            parse_fn=None,
+                                            name=None):
+  """Adds operations to read, queue, batch and parse `Example` protos.
+
+  Given file pattern (or list of files), will setup a shared queue for file
+  names, setup a worker queue that gets filenames from the shared queue,
+  read `Example` proto using provided `reader`, use batch queue to create
+  batches of examples of size `batch_size` and parse example given `features`
+  specification.
+
+  All queue runners are added to the queue runners collection, and may be
+  started via `start_queue_runners`.
+
+  All ops are added to the default graph.
+
+  Args:
+    file_pattern: List of files or pattern of file paths containing
+        `Example` records. See `tf.gfile.Glob` for pattern rules.
+    batch_size: An int or scalar `Tensor` specifying the batch size to use.
+    features: A `dict` mapping feature keys to `FixedLenFeature` or
+      `VarLenFeature` values.
+    reader: A function or class that returns an object with
+      `read` method, (filename tensor) -> (example tensor).
+    randomize_input: Whether the input should be randomized.
+    num_epochs: Integer specifying the number of times to read through the
+      dataset. If None, cycles through the dataset forever. NOTE - If specified,
+      creates a variable that must be initialized, so call
+      tf.initialize_local_variables() as shown in the tests.
+    queue_capacity: Capacity for input queue.
+    reader_num_threads: The number of threads to read examples.
+    feature_queue_capacity: Capacity of the parsed features queue.
+    num_queue_runners: Number of queue runners to start for the feature queue,
+      Adding multiple queue runners for the parsed example queue helps maintain
+      a full queue when the subsequent computations overall are cheaper than
+      parsing.
+    parser_num_threads: (Deprecated) The number of threads to parse examples.
+    parse_fn: Parsing function, takes `Example` Tensor returns parsed
+      representation. If `None`, no parsing is done.
+    name: Name of resulting op.
+
+  Returns:
+    Returns tuple of:
+    - `Tensor` of string keys.
+    - A dict of `Tensor` or `SparseTensor` objects for each in `features`.
+
+  Raises:
+    ValueError: for invalid inputs.
+  """
+
+  with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope:
+    keys, examples = read_keyed_batch_examples_shared_queue(
+        file_pattern,
+        batch_size,
+        reader,
+        randomize_input=randomize_input,
+        num_epochs=num_epochs,
+        queue_capacity=queue_capacity,
+        num_threads=reader_num_threads,
+        read_batch_size=batch_size,
+        parse_fn=parse_fn,
+        name=scope)
+    # Parse the example.
+    feature_map = parsing_ops.parse_example(examples, features)
    return queue_parsed_features(
        feature_map,
        keys=keys,
@ -301,7 +544,8 @@ def read_keyed_batch_features(file_pattern,
 def queue_parsed_features(parsed_features,
                          keys=None,
                          feature_queue_capacity=100,
-                          num_queue_runners=2,
+                          num_queue_runners=None,
+                          num_enqueue_threads=None,
                          name=None):
  """Speeds up parsing by using queues to do it asynchronously.

@ -320,10 +564,17 @@ def queue_parsed_features(parsed_features,
    parsed_features: A dict of string key to `Tensor` or `SparseTensor` objects.
    keys: `Tensor` of string keys.
    feature_queue_capacity: Capacity of the parsed features queue.
-    num_queue_runners: Number of queue runners to start for the feature queue,
-      Adding multiple queue runners for the parsed example queue helps maintain
+    num_queue_runners: Deprecated. Defaults to 2 if this and
+      `num_enqueue_threads` are both `None`. This is the number of queue
+      runners to start for the feature queue. Adding multiple queue runners for
+      the parsed example queue helps maintain a full queue when the subsequent
+      computations overall are cheaper than parsing. This argument will be
+      deprecated and replaced with `num_enqueue_threads`.
+    num_enqueue_threads: Number of threads to enqueue the parsed example queue.
+      Using multiple threads to enqueue the parsed example queue helps maintain
      a full queue when the subsequent computations overall are cheaper than
-      parsing.
+      parsing. This argument will replace `num_queue_runners`. This and
+      `num_queue_runners` can not both be set.
    name: Name of resulting op.

  Returns:
@ -331,7 +582,12 @@ def queue_parsed_features(parsed_features,
    - `Tensor` corresponding to `keys` if provided, otherwise `None`.
    -  A dict of string key to `Tensor` or `SparseTensor` objects corresponding
       to `parsed_features`.
+  Raises:
+    ValueError: for invalid inputs.
  """
+  num_queue_runners, num_enqueue_threads = _check_enqueue_params(
+      num_queue_runners, num_enqueue_threads)
+
  args = list(parsed_features.values())
  if keys is not None:
    args += [keys]
@ -370,12 +626,31 @@ def queue_parsed_features(parsed_features,

    # Add multiple queue runners so that the queue is always full. Adding more
    # than two queue-runners may hog the cpu on the worker to fill up the queue.
-    for _ in range(num_queue_runners):
-      queue_runner.add_queue_runner(
-          queue_runner.QueueRunner(
-              input_queue, [input_queue.enqueue(tensors_to_enqueue)],
-              queue_closed_exception_types=(errors.OutOfRangeError,
-                                            errors.CancelledError)))
+    #
+    # Note: this can result in large last batch being lost as the multiple queue
+    # runner threads do not coordinate with each other. Please use
+    # `num_enqueue_threads` instead.
+    if num_queue_runners is not None:
+      for _ in range(num_queue_runners):
+        queue_runner.add_queue_runner(
+            queue_runner.QueueRunner(
+                input_queue, [input_queue.enqueue(tensors_to_enqueue)],
+                queue_closed_exception_types=(errors.OutOfRangeError,
+                                              errors.CancelledError)))
+    # Use a single QueueRunner with multiple threads to enqueue so the queue is
+    # always full. The threads are coordinated so the last batch will not be
+    # lost.
+    elif num_enqueue_threads is not None:
+      enqueue_ops = [input_queue.enqueue(tensors_to_enqueue)
+                     for _ in range(num_enqueue_threads)]
+      queue_runner.add_queue_runner(queue_runner.QueueRunner(
+          input_queue, enqueue_ops,
+          queue_closed_exception_types=(errors.OutOfRangeError,
+                                        errors.CancelledError)))
+    else:
+      raise AssertionError(
+          'Either `num_queue_runners` or `num_enqueue_threads` should have '
+          'been set.')

    dequeued_tensors = input_queue.dequeue()

--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
@ -19,13 +19,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import base64
 import os
 import random
 import tempfile

+from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf

+from tensorflow.contrib.learn.python.learn.learn_io.graph_io import _read_keyed_batch_examples_shared_queue
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import gfile

@ -243,6 +247,63 @@ class GraphIOTest(tf.test.TestCase):

      coord.request_stop()

+  def test_read_keyed_batch_features_mutual_exclusive_args(self):
+    filename = self._create_temp_file("abcde")
+    features = {"sequence": tf.FixedLenFeature([], tf.string)}
+    with self.assertRaisesRegexp(ValueError, "can not both be set"):
+      _, _ = tf.contrib.learn.read_keyed_batch_features(
+          filename, 1, features, tf.TextLineReader, randomize_input=False,
+          num_queue_runners=2, num_enqueue_threads=2)
+
+  def test_queue_parsed_features_mutual_exclusive_args(self):
+    parsed_features = {"a": tf.constant([10, 20, 30])}
+    with self.assertRaisesRegexp(ValueError, "can not both be set"):
+      _, _ = tf.contrib.learn.queue_parsed_features(
+          parsed_features, num_queue_runners=2, num_enqueue_threads=2)
+
+  def test_read_text_lines_large(self):
+    gfile.Glob = self._orig_glob
+    sequence_prefix = "abcdefghijklmnopqrstuvwxyz123456789"
+    num_records = 49999
+    lines = ["".join([sequence_prefix, str(l)]).encode("ascii")
+             for l in xrange(num_records)]
+    json_lines = ["".join(['{"features": { "feature": { "sequence": {',
+                           '"bytes_list": { "value": ["',
+                           base64.b64encode(l).decode("ascii"),
+                           '"]}}}}}\n']) for l in lines]
+    filename = self._create_temp_file("".join(json_lines))
+    batch_size = 10000
+    queue_capacity = 10000
+    name = "my_large_batch"
+
+    features = {"sequence": tf.FixedLenFeature([], tf.string)}
+
+    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
+      _, result = tf.contrib.learn.read_keyed_batch_features(
+          filename, batch_size, features, tf.TextLineReader,
+          randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
+          num_enqueue_threads=2, parse_fn=tf.decode_json_example, name=name)
+      session.run(tf.initialize_local_variables())
+      coord = tf.train.Coordinator()
+      threads = tf.train.start_queue_runners(session, coord=coord)
+
+      data = []
+      try:
+        while not coord.should_stop():
+          data.append(session.run(result))
+      except errors.OutOfRangeError:
+        pass
+      finally:
+        coord.request_stop()
+
+      coord.join(threads)
+    parsed_records = [item for sublist in [d["sequence"] for d in data]
+                      for item in sublist]
+    # Check that the number of records matches expected and all records
+    # are present.
+    self.assertEqual(len(parsed_records), num_records)
+    self.assertEqual(set(parsed_records), set(lines))
+
  def test_read_text_lines_multifile(self):
    gfile.Glob = self._orig_glob
    filenames = self._create_sorted_temp_files(["ABC\n", "DEF\nGHK\n"])
@ -261,6 +322,18 @@ class GraphIOTest(tf.test.TestCase):
      coord = tf.train.Coordinator()
      tf.train.start_queue_runners(session, coord=coord)

+      self.assertEqual("%s:1" % name, inputs.name)
+      file_name_queue_name = "%s/file_name_queue" % name
+      file_names_name = "%s/input" % file_name_queue_name
+      example_queue_name = "%s/fifo_queue" % name
+      test_util.assert_ops_in_graph({
+          file_names_name: "Const",
+          file_name_queue_name: "FIFOQueue",
+          "%s/read/TextLineReader" % name: "TextLineReader",
+          example_queue_name: "FIFOQueue",
+          name: "QueueDequeueUpTo"
+      }, g)
+
      self.assertAllEqual(session.run(inputs), [b"ABC"])
      self.assertAllEqual(session.run(inputs), [b"DEF"])
      self.assertAllEqual(session.run(inputs), [b"GHK"])
@ -269,6 +342,120 @@ class GraphIOTest(tf.test.TestCase):

      coord.request_stop()

+  def test_read_text_lines_multifile_with_shared_queue(self):
+    gfile.Glob = self._orig_glob
+    filenames = self._create_sorted_temp_files(["ABC\n", "DEF\nGHK\n"])
+
+    batch_size = 1
+    queue_capacity = 5
+    name = "my_batch"
+
+    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
+      _, inputs = _read_keyed_batch_examples_shared_queue(
+          filenames,
+          batch_size,
+          reader=tf.TextLineReader,
+          randomize_input=False,
+          num_epochs=1,
+          queue_capacity=queue_capacity,
+          name=name)
+      session.run(tf.initialize_local_variables())
+
+      coord = tf.train.Coordinator()
+      tf.train.start_queue_runners(session, coord=coord)
+
+      self.assertEqual("%s:1" % name, inputs.name)
+      shared_file_name_queue_name = "%s/file_name_queue" % name
+      file_names_name = "%s/input" % shared_file_name_queue_name
+      example_queue_name = "%s/fifo_queue" % name
+      worker_file_name_queue_name = "%s/file_name_queue/fifo_queue" % name
+      test_util.assert_ops_in_graph({
+          file_names_name: "Const",
+          shared_file_name_queue_name: "FIFOQueue",
+          "%s/read/TextLineReader" % name: "TextLineReader",
+          example_queue_name: "FIFOQueue",
+          worker_file_name_queue_name: "FIFOQueue",
+          name: "QueueDequeueUpTo"
+      }, g)
+
+      self.assertAllEqual(session.run(inputs), [b"ABC"])
+      self.assertAllEqual(session.run(inputs), [b"DEF"])
+      self.assertAllEqual(session.run(inputs), [b"GHK"])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run(inputs)
+
+      coord.request_stop()
+
+  def _get_qr(self, name):
+    for qr in ops.get_collection(ops.GraphKeys.QUEUE_RUNNERS):
+      if qr.name == name:
+        return qr
+
+  def _run_queue(self, name, session):
+    qr = self._get_qr(name)
+    for op in qr.enqueue_ops:
+      session.run(op)
+
+  def test_multiple_workers_with_shared_queue(self):
+    gfile.Glob = self._orig_glob
+    filenames = self._create_sorted_temp_files([
+        "ABC\n", "DEF\n", "GHI\n", "JKL\n", "MNO\n", "PQR\n", "STU\n", "VWX\n",
+        "YZ\n"
+    ])
+
+    batch_size = 1
+    queue_capacity = 5
+    name = "my_batch"
+    shared_file_name_queue_name = "%s/file_name_queue" % name
+    example_queue_name = "%s/fifo_queue" % name
+    worker_file_name_queue_name = "%s/file_name_queue/fifo_queue" % name
+
+    server = tf.train.Server.create_local_server()
+
+    with tf.Graph().as_default() as g1, tf.Session(
+        server.target, graph=g1) as session:
+      _, inputs = _read_keyed_batch_examples_shared_queue(
+          filenames,
+          batch_size,
+          reader=tf.TextLineReader,
+          randomize_input=False,
+          num_epochs=1,
+          queue_capacity=queue_capacity,
+          name=name)
+      session.run(tf.initialize_local_variables())
+
+      # Run the three queues once manually.
+      self._run_queue(shared_file_name_queue_name, session)
+      self._run_queue(worker_file_name_queue_name, session)
+      self._run_queue(example_queue_name, session)
+
+      self.assertAllEqual(session.run(inputs), [b"ABC"])
+
+      # Run the worker and the example queue.
+      self._run_queue(worker_file_name_queue_name, session)
+      self._run_queue(example_queue_name, session)
+
+      self.assertAllEqual(session.run(inputs), [b"DEF"])
+
+    with tf.Graph().as_default() as g2, tf.Session(
+        server.target, graph=g2) as session:
+      _, inputs = _read_keyed_batch_examples_shared_queue(
+          filenames,
+          batch_size,
+          reader=tf.TextLineReader,
+          randomize_input=False,
+          num_epochs=1,
+          queue_capacity=queue_capacity,
+          name=name)
+
+      # Run the worker and the example queue.
+      self._run_queue(worker_file_name_queue_name, session)
+      self._run_queue(example_queue_name, session)
+
+      self.assertAllEqual(session.run(inputs), [b"GHI"])
+
+    self.assertTrue(g1 is not g2)
+
  def test_batch_text_lines(self):
    gfile.Glob = self._orig_glob
    filename = self._create_temp_file("A\nB\nC\nD\nE\n")
--- a/tensorflow/contrib/learn/python/learn/learn_io/io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/io_test.py
@ -45,7 +45,7 @@ class IOTest(tf.test.TestCase):
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
-      score = accuracy_score(labels[0], classifier.predict(data))
+      score = accuracy_score(labels[0], list(classifier.predict(data)))
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
    else:
      print("No pandas installed. pandas-related tests are skipped.")
@ -61,7 +61,7 @@ class IOTest(tf.test.TestCase):
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
-      score = accuracy_score(labels, classifier.predict(data))
+      score = accuracy_score(labels, list(classifier.predict(data)))
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))

  def test_string_data_formats(self):
--- a/tensorflow/contrib/learn/python/learn/tests/learn_runner_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/learn_runner_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/metric_spec_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/metric_spec_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/monitors_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/monitors_test.py
--- a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py
+++ b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py
@ -33,7 +33,7 @@ def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None):
    predictions = nn.xw_plus_b(tensor_in, weights, biases)
    if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2:
      predictions = array_ops_.squeeze(predictions, squeeze_dims=[1])
-    return predictions, loss_ops.sum_of_squares(predictions, labels)
+    return predictions, loss_ops.mean_squared_error(predictions, labels)


 def softmax_classifier(tensor_in,
--- a/tensorflow/contrib/learn/python/learn/ops/tests/ops_test.py
+++ b/tensorflow/contrib/learn/python/learn/ops/tests/ops_test.py
--- a/tensorflow/contrib/learn/python/learn/ops/tests/seq2seq_ops_test.py
+++ b/tensorflow/contrib/learn/python/learn/ops/tests/seq2seq_ops_test.py
--- a/tensorflow/contrib/learn/python/learn/ops/tests/init.py
+++ b/tensorflow/contrib/learn/python/learn/ops/tests/init.py
@ -1,20 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Sequence-to-sequence tests."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
--- a/tensorflow/contrib/learn/python/learn/tests/basic_session_run_hooks_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/basic_session_run_hooks_test.py
@ -1,100 +0,0 @@
-# pylint: disable=g-bad-file-header
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for basic_session_run_hooks."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-import shutil
-import tempfile
-import time
-
-import tensorflow as tf
-
-from tensorflow.contrib import testing
-from tensorflow.python.training import monitored_session
-
-
-class StepCounterHookTest(tf.test.TestCase):
-
-  def setUp(self):
-    self.log_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    shutil.rmtree(self.log_dir, ignore_errors=True)
-
-  def test_step_counter(self):
-    with tf.Graph().as_default() as g, tf.Session() as sess:
-      global_step = tf.contrib.framework.get_or_create_global_step()
-      train_op = tf.assign_add(global_step, 1)
-      summary_writer = testing.FakeSummaryWriter(self.log_dir, g)
-      hook = tf.train.StepCounterHook(
-          summary_writer=summary_writer, every_n_steps=10)
-      hook.begin()
-      sess.run(tf.initialize_all_variables())
-      mon_sess = monitored_session._HookedSession(sess, [hook])
-      for _ in range(30):
-        time.sleep(0.01)
-        mon_sess.run(train_op)
-      hook.end(sess)
-      summary_writer.assert_summaries(
-          test_case=self,
-          expected_logdir=self.log_dir,
-          expected_graph=g,
-          expected_summaries={})
-      for step in [11, 21]:
-        summary_value = summary_writer.summaries[step][0].value[0]
-        self.assertTrue(summary_value.tag, 'global_step/sec')
-        # check at least 10 steps per sec is recorded.
-        self.assertGreater(summary_value.simple_value, 10)
-
-
-class SummarySaverHookTest(tf.test.TestCase):
-
-  def test_summary_saver(self):
-    with tf.Graph().as_default() as g, tf.Session() as sess:
-      log_dir = 'log/dir'
-      summary_writer = testing.FakeSummaryWriter(log_dir, g)
-      var = tf.Variable(0.0)
-      tensor = tf.assign_add(var, 1.0)
-      summary_op = tf.scalar_summary('my_summary', tensor)
-      global_step = tf.contrib.framework.get_or_create_global_step()
-      train_op = tf.assign_add(global_step, 1)
-      hook = tf.train.SummarySaverHook(
-          summary_op=summary_op, save_steps=8, summary_writer=summary_writer)
-      hook.begin()
-      sess.run(tf.initialize_all_variables())
-      mon_sess = monitored_session._HookedSession(sess, [hook])
-      for i in range(30):
-        _ = i
-        mon_sess.run(train_op)
-      hook.end(sess)
-      summary_writer.assert_summaries(
-          test_case=self,
-          expected_logdir=log_dir,
-          expected_graph=g,
-          expected_summaries={
-              1: {'my_summary': 1.0},
-              9: {'my_summary': 2.0},
-              17: {'my_summary': 3.0},
-              25: {'my_summary': 4.0},
-          })
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/tensorflow/contrib/learn/python/learn/tests/summary_writer_cache_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/summary_writer_cache_test.py
@ -1,78 +0,0 @@
-# pylint: disable=g-bad-file-header
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Runner."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import glob
-import os
-
-import tensorflow as tf
-
-from tensorflow.contrib.learn.python.learn import summary_writer_cache
-
-
-class SummaryWriterCacheTest(tf.test.TestCase):
-  """SummaryWriterCache tests."""
-
-  def _test_dir(self, test_name):
-    """Create an empty dir to use for tests.
-
-    Args:
-      test_name: Name of the test.
-
-    Returns:
-      Absolute path to the test directory.
-    """
-    test_dir = os.path.join(self.get_temp_dir(), test_name)
-    if os.path.isdir(test_dir):
-      for f in glob.glob('%s/*' % test_dir):
-        os.remove(f)
-    else:
-      os.makedirs(test_dir)
-    return test_dir
-
-  def test_cache(self):
-    with tf.Graph().as_default():
-      dir1 = self._test_dir('test_cache_1')
-      dir2 = self._test_dir('test_cache_2')
-      sw1 = summary_writer_cache.SummaryWriterCache.get(dir1)
-      sw2 = summary_writer_cache.SummaryWriterCache.get(dir2)
-      sw3 = summary_writer_cache.SummaryWriterCache.get(dir1)
-      self.assertEqual(sw1, sw3)
-      self.assertFalse(sw1 == sw2)
-      sw1.close()
-      sw2.close()
-      events1 = glob.glob(os.path.join(dir1, 'event*'))
-      self.assertTrue(events1)
-      events2 = glob.glob(os.path.join(dir2, 'event*'))
-      self.assertTrue(events2)
-      events3 = glob.glob(os.path.join('nowriter', 'event*'))
-      self.assertFalse(events3)
-
-  def test_clear(self):
-    with tf.Graph().as_default():
-      dir1 = self._test_dir('test_clear')
-      sw1 = summary_writer_cache.SummaryWriterCache.get(dir1)
-      summary_writer_cache.SummaryWriterCache.clear()
-      sw2 = summary_writer_cache.SummaryWriterCache.get(dir1)
-      self.assertFalse(sw1 == sw2)
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/tensorflow/contrib/learn/python/learn/utils/export.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export.py
@ -19,7 +19,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
@ -312,21 +311,10 @@ def _export_estimator(estimator,
                                                               predictions)
    else:
      try:
-        # Some estimators provide a target_column of known type
-        target_column = estimator._get_target_column()
-        problem_type = target_column.problem_type
-
-        if problem_type == layers.ProblemType.CLASSIFICATION:
-          signature_fn = classification_signature_fn
-        elif problem_type == layers.ProblemType.LINEAR_REGRESSION:
-          signature_fn = regression_signature_fn
-        elif problem_type == layers.ProblemType.LOGISTIC_REGRESSION:
-          signature_fn = logistic_regression_signature_fn
-        else:
-          raise ValueError(
-              'signature_fn must be provided because the TargetColumn is a %s, '
-              'which does not have a standard problem type and so cannot use a '
-              'standard export signature.' % type(target_column).__name__)
+        # Some estimators provide a signature function.
+        # TODO(zakaria): check if the estimator has this function,
+        #   raise helpful error if not
+        signature_fn = estimator._create_signature_fn()

        default_signature, named_graph_signatures = (
            signature_fn(examples, features, predictions))
--- a/tensorflow/contrib/learn/python/learn/utils/export_test.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export_test.py
@ -47,6 +47,28 @@ class ExportTest(tf.test.TestCase):
      default_signature = signatures.default_signature
      return default_signature

+  def testExportMonitor_EstimatorProvidesSignature(self):
+    random.seed(42)
+    x = np.random.rand(1000)
+    y = 2 * x + 3
+    cont_features = [tf.contrib.layers.real_valued_column('', dimension=1)]
+    regressor = learn.LinearRegressor(feature_columns=cont_features)
+    export_dir = tempfile.mkdtemp() + 'export/'
+    export_monitor = learn.monitors.ExportMonitor(
+        every_n_steps=1, export_dir=export_dir, exports_to_keep=2)
+    regressor.fit(x, y, steps=10,
+                  monitors=[export_monitor])
+
+    self.assertTrue(tf.gfile.Exists(export_dir))
+    # Only the written checkpoints are exported.
+    self.assertTrue(tf.gfile.Exists(export_dir + '00000001/export'))
+    self.assertTrue(tf.gfile.Exists(export_dir + '00000010/export'))
+    self.assertEquals(export_monitor.last_export_dir, os.path.join(export_dir,
+                                                                   '00000010'))
+    # Validate the signature
+    signature = self._get_default_signature(export_dir + '00000010/export.meta')
+    self.assertTrue(signature.HasField('regression_signature'))
+
  def testExportMonitor(self):
    random.seed(42)
    x = np.random.rand(1000)
--- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
@ -189,6 +189,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
        train_op = lr.minimize()
        for _ in range(_MAX_ITERATIONS):
          train_op.run()
+        lr.update_weights(train_op).run()
        # The high tolerance in unregularized_loss comparisons is due to the
        # fact that it's possible to trade off unregularized_loss vs.
        # regularization and still have a sum that is quite close to the
@ -248,6 +249,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):

          for t in threads:
            t.join()
+          lr.update_weights(train_op).run()

          # The high tolerance in unregularized_loss comparisons is due to the
          # fact that it's possible to trade off unregularized_loss vs.
@ -294,6 +296,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
        train_op = lr.minimize()
        for _ in range(_MAX_ITERATIONS):
          train_op.run()
+        lr.update_weights(train_op).run()

        # There is neither L1 nor L2 loss, so regularized and unregularized
        # losses should be exactly the same.
@ -346,6 +349,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
        train_op = lr.minimize()
        for _ in range(_MAX_ITERATIONS):
          train_op.run()
+        lr.update_weights(train_op).run()

        self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
        self.assertAllClose(0.525457, loss.eval(), atol=0.01)
@ -416,6 +420,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
        train_op = lr.minimize()
        for _ in range(_MAX_ITERATIONS):
          train_op.run()
+        lr.update_weights(train_op).run()

        self.assertAllClose(0.226487 + 0.102902,
                            unregularized_loss.eval(),
@ -456,6 +461,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
        train_op = lr.minimize()
        for _ in range(_MAX_ITERATIONS):
          train_op.run()
+        lr.update_weights(train_op).run()

        self.assertAllClose(0.284860, unregularized_loss.eval(), atol=0.08)
        self.assertAllClose(0.408044, loss.eval(), atol=0.012)
@ -494,6 +500,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
        train_op = lr.minimize()
        for _ in range(_MAX_ITERATIONS):
          train_op.run()
+        lr.update_weights(train_op).run()
        self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
        self.assertAllClose(0.525457, loss.eval(), atol=0.01)
        predicted_labels = get_binary_predictions_for_logistic(predictions)
@ -580,6 +587,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
+      lr.update_weights(train_op).run()

      # Predictions should be 2/3 of label due to minimizing regularized loss:
      #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2
@ -626,6 +634,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
+      lr.update_weights(train_op).run()

      # Predictions should be 1/5 of label due to minimizing regularized loss:
      #   (label - 2 * weight)^2 + L2 * 16 * weight^2
@ -661,6 +670,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
+      lr.update_weights(train_op).run()

      # Predictions should be -4.0, 48/5 due to minimizing regularized loss:
      #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 + L1 * 4 * weight
@ -696,6 +706,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
+      lr.update_weights(train_op).run()

      # There are 4 (sparse) variable weights to be learned. 2 for age and 2 for
      # gender. Let w_1, w_2 be age weights, w_3, w_4 be gender weights, y_1,
@ -729,6 +740,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
+      lr.update_weights(train_op).run()

      # The loss function for these particular features is given by:
      # 1/2(label_1-w_1)^2 + 1/2(label_2-w_2)^2 + \lambda/2 (w_1^2 + w_2^2). So,
@ -759,6 +771,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
      train_op = lr.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
+      lr.update_weights(train_op).run()

      # The loss function for these particular features is given by:
      # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 +
@ -816,6 +829,7 @@ class SdcaWithHingeLossTest(SdcaModelTest):
      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
+      model.update_weights(train_op).run()

      binary_predictions = get_binary_predictions_for_hinge(predictions)
      self.assertAllEqual([-1.0, 1.0], predictions.eval())
@ -841,6 +855,7 @@ class SdcaWithHingeLossTest(SdcaModelTest):
      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
+      model.update_weights(train_op).run()

      self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05)
      self.assertAllEqual([1, 0], binary_predictions.eval())
@ -871,6 +886,7 @@ class SdcaWithHingeLossTest(SdcaModelTest):
      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
+      model.update_weights(train_op).run()

      # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints
      # are within the margins so there is unregularized loss (1/2 per example).
@ -899,6 +915,7 @@ class SdcaWithHingeLossTest(SdcaModelTest):
      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
+      model.update_weights(train_op).run()

      # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will
      # try to increase the margin from (1.0, 0.5). Due to regularization,
@ -953,6 +970,7 @@ class SdcaWithSmoothHingeLossTest(SdcaModelTest):
      train_op = model.minimize()
      for _ in range(_MAX_ITERATIONS):
        train_op.run()
+      model.update_weights(train_op).run()

      binary_predictions = get_binary_predictions_for_hinge(predictions)
      self.assertAllClose([-0.67, 0.67], predictions.eval(), atol=0.05)
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@ -278,7 +278,8 @@ class SdcaModel(object):
    ```python
    # Create a solver with the desired parameters.
    lr = tf.contrib.linear_optimizer.SdcaModel(examples, variables, options)
-    opt_op = lr.minimize()
+    min_op = lr.minimize()
+    opt_op = lr.update_weights(min_op)

    predictions = lr.predictions(examples)
    # Primal loss + L1 loss + L2 loss.
@ -565,35 +566,46 @@ class SdcaModel(object):
        for w, u in zip(self._slots['unshrinked_dense_features_weights'], dfw):
          update_ops.append(w.assign_add(u))

-        with ops.control_dependencies(update_ops):
-          update_ops = []
-          # Copy over unshrinked weights to user provided variables.
-          for i, name in enumerate(
-              ['sparse_features_weights', 'dense_features_weights']):
-            for var, slot_var in zip(self._variables[name],
-                                     self._slots['unshrinked_' + name]):
-              update_ops.append(var.assign(slot_var))
-
-          update_group = control_flow_ops.group(*update_ops)
-
-          # Apply proximal step.
-          with ops.control_dependencies([update_group]):
-            shrink_ops = []
-            for name in ['sparse_features_weights', 'dense_features_weights']:
-              for var in self._variables[name]:
-                with ops.device(var.device):
-                  shrink_ops.append(
-                      sdca_shrink_l1(
-                          self._convert_n_to_tensor(
-                              [var], as_ref=True),
-                          l1=self._symmetric_l1_regularization(),
-                          l2=self._symmetric_l2_regularization()))
-            shrink_l1 = control_flow_ops.group(*shrink_ops)
      if not global_step:
-        return shrink_l1
-      with ops.control_dependencies([shrink_l1]):
+        return control_flow_ops.group(*update_ops)
+      with ops.control_dependencies(update_ops):
        return state_ops.assign_add(global_step, 1, name=name).op

+  def update_weights(self, train_op):
+    """Updates the model weights.
+
+    This function must be called on at least one worker after `minimize`.
+    In distributed training this call can be omitted on non-chief workers to
+    speed up training.
+
+    Args:
+      train_op: The operation returned by the `minimize` call.
+
+    Returns:
+      An Operation that updates the model weights.
+    """
+    with ops.control_dependencies([train_op]):
+      update_ops = []
+      # Copy over unshrinked weights to user provided variables.
+      for name in ['sparse_features_weights', 'dense_features_weights']:
+        for var, slot_var in zip(self._variables[name],
+                                 self._slots['unshrinked_' + name]):
+          update_ops.append(var.assign(slot_var))
+
+    # Apply proximal step.
+    with ops.control_dependencies(update_ops):
+      update_ops = []
+      for name in ['sparse_features_weights', 'dense_features_weights']:
+        for var in self._variables[name]:
+          with ops.device(var.device):
+            update_ops.append(
+                sdca_shrink_l1(
+                    self._convert_n_to_tensor(
+                        [var], as_ref=True),
+                    l1=self._symmetric_l1_regularization(),
+                    l2=self._symmetric_l2_regularization()))
+      return control_flow_ops.group(*update_ops)
+
  def approximate_duality_gap(self):
    """Add operations to compute the approximate duality gap.

--- a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py
+++ b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py
@ -49,7 +49,7 @@ class SDCAOptimizer(object):
  as `key` whose value is a `Tensor` of shape [batch_size] and dtype string.
  num_loss_partitions defines the number of partitions of the global loss
  function and should be set to (#concurrent train ops/per worker) x (#workers).
-  Convergence of (global) loss is guranteed if num_loss_partitions is larger or
+  Convergence of (global) loss is guaranteed if num_loss_partitions is larger or
  equal to the above product. Larger values for num_loss_partitions lead to
  slower convergence. The recommended value for num_loss_partitions in tf.learn
  (where currently there is one process per worker) is the number of workers
@ -181,4 +181,5 @@ class SDCAOptimizer(object):
            num_loss_partitions=self._num_loss_partitions,
            num_table_shards=self._num_table_shards,
            loss_type=loss_type))
-    return sdca_model.minimize(global_step=global_step)
+    train_op = sdca_model.minimize(global_step=global_step)
+    return sdca_model, train_op
--- a/tensorflow/contrib/losses/python/losses/loss_ops.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops.py
@ -21,7 +21,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework.python.ops import add_arg_scope
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@ -43,9 +42,7 @@ __all__ = ["absolute_difference",
           "mean_squared_error",
           "sigmoid_cross_entropy",
           "softmax_cross_entropy",
-           "sparse_softmax_cross_entropy",
-           "sum_of_pairwise_squares",
-           "sum_of_squares"]
+           "sparse_softmax_cross_entropy"]


 def _scale_losses(losses, weight):
@ -486,8 +483,7 @@ def hinge_loss(logits, target, scope=None):
    return losses


-@deprecated("2016-10-01", "Use mean_squared_error.")
-def sum_of_squares(predictions, targets, weight=1.0, scope=None):
+def mean_squared_error(predictions, targets, weight=1.0, scope=None):
  """Adds a Sum-of-Squares loss to the training procedure.

  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
@ -512,7 +508,7 @@ def sum_of_squares(predictions, targets, weight=1.0, scope=None):
    ValueError: If the shape of `predictions` doesn't match that of `targets` or
      if the shape of `weight` is invalid.
  """
-  with ops.name_scope(scope, "sum_of_squares_loss",
+  with ops.name_scope(scope, "mean_squared_error",
                      [predictions, targets]) as scope:
    predictions.get_shape().assert_is_compatible_with(targets.get_shape())
    if weight is None:
@ -523,17 +519,13 @@ def sum_of_squares(predictions, targets, weight=1.0, scope=None):
    return compute_weighted_loss(losses, weight)


-mean_squared_error = sum_of_squares
-
-
-@deprecated("2016-10-01", "Use mean_pairwise_squared_error.")
-def sum_of_pairwise_squares(predictions, targets, weight=1.0, scope=None):
+def mean_pairwise_squared_error(predictions, targets, weight=1.0, scope=None):
  """Adds a pairwise-errors-squared loss to the training procedure.

-  Unlike the sum_of_squares loss, which is a measure of the differences between
-  corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares
-  is a measure of the differences between pairs of corresponding elements of
-  `predictions` and `targets`.
+  Unlike `mean_squared_error`, which is a measure of the differences between
+  corresponding elements of `predictions` and `targets`,
+  `mean_pairwise_squared_error` is a measure of the differences between pairs of
+  corresponding elements of `predictions` and `targets`.

  For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are
  three pairs of differences are summed to compute the loss:
@ -566,7 +558,7 @@ def sum_of_pairwise_squares(predictions, targets, weight=1.0, scope=None):
    ValueError: If the shape of `predictions` doesn't match that of `targets` or
      if the shape of `weight` is invalid.
  """
-  with ops.name_scope(scope, "sum_of_pairwise_squares_loss",
+  with ops.name_scope(scope, "mean_pairwise_squared_error",
                      [predictions, targets]) as scope:
    predictions.get_shape().assert_is_compatible_with(targets.get_shape())
    if weight is None:
@ -607,9 +599,6 @@ def sum_of_pairwise_squares(predictions, targets, weight=1.0, scope=None):
    return mean_loss


-mean_pairwise_squared_error = sum_of_pairwise_squares
-
-
 def cosine_distance(predictions, targets, dim, weight=1.0, scope=None):
  """Adds a cosine-distance loss to the training procedure.

--- a/tensorflow/contrib/losses/python/losses/loss_ops_test.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops_test.py
@ -779,12 +779,6 @@ class MeanSquaredErrorTest(tf.test.TestCase):
    self._predictions = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
    self._targets = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))

-  def testDeprecatedName(self):
-    loss = tf.contrib.losses.sum_of_squares(
-        self._predictions, self._predictions)
-    with self.test_session():
-      self.assertAlmostEqual(0.0, loss.eval(), 3)
-
  def testValueErrorThrownWhenWeightIsNone(self):
    with self.test_session():
      with self.assertRaises(ValueError):
@ -875,13 +869,6 @@ class MeanPairwiseSquaresErrorTest(tf.test.TestCase):

    self._expected_losses = np.divide(total, 9.0)

-  def testDeprecatedName(self):
-    loss = tf.contrib.losses.sum_of_pairwise_squares(
-        predictions=tf.constant(self._predictions),
-        targets=tf.constant(self._targets))
-    with self.test_session():
-      self.assertAlmostEqual(np.sum(self._expected_losses), loss.eval(), 3)
-
  def testValueErrorThrownWhenWeightIsNone(self):
    with self.test_session():
      with self.assertRaises(ValueError):
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@ -136,7 +136,7 @@ $(shell mkdir -p $(DEPDIR) >/dev/null)

 # Settings for the target compiler.
 CXX := $(CC_PREFIX) gcc
-OPTFLAGS := -O0
+OPTFLAGS := -O2
 CXXFLAGS := --std=c++11 -DIS_SLIM_BUILD -fno-exceptions -DNDEBUG $(OPTFLAGS)
 LDFLAGS := \
 -L/usr/local/lib
@ -229,6 +229,8 @@ ifeq ($(TARGET),ANDROID)
 --sysroot $(NDK_ROOT)/platforms/android-21/arch-arm \
 -Wno-narrowing \
 -march=armv7-a \
+-mfloat-abi=softfp \
+-mfpu=neon \
 -fPIE

 	INCLUDES = \
--- a/tensorflow/contrib/makefile/build_all_android.sh
+++ b/tensorflow/contrib/makefile/build_all_android.sh
@ -66,6 +66,7 @@ if [[ "${USE_HEXAGON}" == "true" ]]; then
    HEXAGON_INCLUDE="${HEXAGON_PARENT_DIR}/include"
 fi

+# Recommend make -j<#jobs> e.g. -j8 to speed up build on multi-core machine
 if [[ -z "${BUILD_TARGET}" ]]; then
    make -f tensorflow/contrib/makefile/Makefile \
         TARGET=ANDROID NDK_ROOT="${NDK_ROOT}" CC_PREFIX="${CC_PREFIX}" \
--- a/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py
@ -33,27 +33,34 @@ def confusion_matrix(predictions, labels, num_classes=None, dtype=dtypes.int32,
  Calculate the Confusion Matrix for a pair of prediction and
  label 1-D int arrays.

-  Considering a prediction array such as: `[1, 2, 3]`
-  And a label array such as: `[2, 2, 3]`
-
-  The confusion matrix returned would be the following one:
-
-  ```python
-      [[0, 0, 0]
-       [0, 1, 0]
-       [0, 1, 0]
-       [0, 0, 1]]
-  ```
-
-  If `weights` is not None, then the confusion matrix elements are the
-  corresponding `weights` elements.
-
-  Where the matrix rows represent the prediction labels and the columns
+  The matrix rows represent the prediction labels and the columns
  represents the real labels. The confusion matrix is always a 2-D array
-  of shape [n, n], where n is the number of valid labels for a given
+  of shape `[n, n]`, where `n` is the number of valid labels for a given
  classification task. Both prediction and labels must be 1-D arrays of
  the same shape in order for this function to work.

+  If `num_classes` is None, then `num_classes` will be set to the one plus
+  the maximum value in either predictions or labels.
+  Class labels are expected to start at 0. E.g., if `num_classes` was
+  three, then the possible labels would be `[0, 1, 2]`.
+
+  If `weights` is not `None`, then each prediction contributes its
+  corresponding weight to the total value of the confusion matrix cell.
+
+  For example:
+
+  ```python
+    tf.contrib.metrics.confusion_matrix([1, 2, 4], [2, 2, 4]) ==>
+        [[0 0 0 0 0]
+         [0 0 1 0 0]
+         [0 0 1 0 0]
+         [0 0 0 0 0]
+         [0 0 0 0 1]]
+  ```
+
+  Note that the possible labels are assumed to be `[0, 1, 2, 3, 4]`,
+  resulting in a 5x5 confusion matrix.
+
  Args:
    predictions: A 1-D array representing the predictions for a given
                 classification.
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@ -22,6 +22,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_args
 from tensorflow.contrib.framework import tensor_util
 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
@ -113,13 +114,15 @@ def _safe_scalar_div(numerator, denominator, name):
      name=name)


-def _create_local(name, shape=None, collections=None, dtype=dtypes.float32):
+def _create_local(name, shape, collections=None, validate_shape=True,
+                  dtype=dtypes.float32):
  """Creates a new local variable.

  Args:
    name: The name of the new or existing variable.
    shape: Shape of the new or existing variable.
    collections: A list of collection names to which the Variable will be added.
+    validate_shape: Whether to validate the shape of the variable.
    dtype: Data type of the variables.

  Returns:
@ -132,7 +135,8 @@ def _create_local(name, shape=None, collections=None, dtype=dtypes.float32):
      initial_value=array_ops.zeros(shape, dtype=dtype),
      name=name,
      trainable=False,
-      collections=collections)
+      collections=collections,
+      validate_shape=validate_shape)


 def _count_condition(values, weights=None, metrics_collections=None,
@ -1225,6 +1229,8 @@ def _at_k_name(name, k, class_id=None):
  return name


+@deprecated('2016-11-08', 'Please use `streaming_sparse_recall_at_k`, '
+            'and reshape labels from [batch_size] to [batch_size, 1].')
@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask')
 def streaming_recall_at_k(predictions, labels, k, ignore_mask=None,
                          weights=None, metrics_collections=None,
@ -1328,7 +1334,7 @@ def streaming_sparse_recall_at_k(predictions,
    labels: `int64` `Tensor` or `SparseTensor` with shape
      [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
      target classes for the associated prediction. Commonly, N=1 and `labels`
-      has shape [batch_size, num_labels]. [D1, ... DN] must match `labels`.
+      has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`.
      Values should be in range [0, num_classes], where num_classes is the last
      dimension of `predictions`.
    k: Integer, k for @k metric.
@ -1429,7 +1435,7 @@ def streaming_sparse_precision_at_k(predictions,
      [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
      target classes for the associated prediction. Commonly, N=1 and `labels`
      has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `predictions_idx`. Values should be in range [0, num_classes], where
+      `predictions`. Values should be in range [0, num_classes], where
      num_classes is the last dimension of `predictions`.
    k: Integer, k for @k metric.
    class_id: Integer class ID for which we want binary metrics. This should be
@ -1596,7 +1602,7 @@ def sparse_average_precision_at_k(predictions, labels, k):
      [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
      target classes for the associated prediction. Commonly, N=1 and `labels`
      has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `predictions_idx`. Values should be in range [0, num_classes], where
+      `predictions`. Values should be in range [0, num_classes], where
      num_classes is the last dimension of `predictions`.
    k: Integer, k for @k metric. This will calculate an average precision for
      range `[1,k]`, as documented above.
@ -1698,7 +1704,7 @@ def streaming_sparse_average_precision_at_k(predictions,
      [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
      target classes for the associated prediction. Commonly, N=1 and `labels`
      has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `predictions_idx`. Values should be in range [0, num_classes], where
+      `predictions`. Values should be in range [0, num_classes], where
      num_classes is the last dimension of `predictions`.
    k: Integer, k for @k metric. This will calculate an average precision for
      range `[1,k]`, as documented above.
@ -1770,9 +1776,8 @@ def _select_class_id(ids, selected_id):
    selected_id: Int id to select.

  Returns:
-    `SparseTensor` of same dimensions as `ids`, except for the last dimension,
-    which might be smaller. This contains only the entries equal to
-    `selected_id`.
+    `SparseTensor` of same dimensions as `ids`. This contains only the entries
+    equal to `selected_id`.
  """
  if isinstance(ids, (ops.SparseTensor, ops.SparseTensorValue)):
    return sparse_ops.sparse_retain(
@ -1782,7 +1787,7 @@ def _select_class_id(ids, selected_id):
  # tf.equal and tf.reduce_any?

  # Shape of filled IDs is the same as `ids` with the last dim collapsed to 1.
-  ids_shape = array_ops.shape(ids)
+  ids_shape = array_ops.shape(ids, out_type=dtypes.int64)
  ids_last_dim = array_ops.size(ids_shape) - 1
  filled_selected_id_shape = math_ops.reduced_shape(
      ids_shape, array_ops.reshape(ids_last_dim, [1]))
@ -1790,7 +1795,9 @@ def _select_class_id(ids, selected_id):
  # Intersect `ids` with the selected ID.
  filled_selected_id = array_ops.fill(
      filled_selected_id_shape, math_ops.to_int64(selected_id))
-  return set_ops.set_intersection(filled_selected_id, ids)
+  result = set_ops.set_intersection(filled_selected_id, ids)
+  return ops.SparseTensor(
+      indices=result.indices, values=result.values, shape=ids_shape)


 def _maybe_select_class_id(labels, predictions_idx, selected_id=None):
@ -2827,7 +2834,8 @@ def streaming_concat(values,
    # applied to contiguous slices
    init_size = 0 if max_size is None else max_size
    init_shape = [init_size] + fixed_shape
-    array = _create_local('array', shape=init_shape, dtype=values.dtype)
+    array = _create_local(
+        'array', shape=init_shape, validate_shape=False, dtype=values.dtype)
    size = _create_local('size', shape=[], dtype=dtypes.int32)

    perm = [0 if n == axis else n + 1 if n < axis else n for n in range(ndim)]
@ -2900,6 +2908,7 @@ def aggregate_metric_map(names_to_tuples):
  This function is useful for pairing metric names with their associated value
  and update ops when the list of metrics is long. For example:

+  ```python
    metrics_to_values, metrics_to_updates = slim.metrics.aggregate_metric_map({
        'Mean Absolute Error': new_slim.metrics.streaming_mean_absolute_error(
            predictions, labels, weights),
@ -2910,6 +2919,7 @@ def aggregate_metric_map(names_to_tuples):
        'RMSE Log': new_slim.metrics.streaming_root_mean_squared_error(
            predictions, labels, weights),
    })
+  ```

  Args:
    names_to_tuples: a map of metric names to tuples, each of which contain the
--- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
@ -132,6 +132,10 @@ def _binary_3d_label_to_sparse(labels):
  return tf.SparseTensor.from_value(_binary_3d_label_to_sparse_value(labels))


+def _assert_nan(test_case, actual):
+  test_case.assertTrue(math.isnan(actual), 'Expected NAN, got %s.' % actual)
+
+
 class StreamingMeanTest(tf.test.TestCase):

  def setUp(self):
@ -1603,6 +1607,9 @@ class StreamingPrecisionRecallThresholdsTest(tf.test.TestCase):
      self.assertAlmostEqual(expected_rec, rec.eval(), 2)


+# TODO(ptucker): Remove when we remove `streaming_recall_at_k`.
+# This op will be deprecated soon in favor of `streaming_sparse_recall_at_k`.
+# Until then, this test validates that both ops yield the same results.
 class StreamingRecallAtKTest(tf.test.TestCase):

  def setUp(self):
@ -1639,57 +1646,78 @@ class StreamingRecallAtKTest(tf.test.TestCase):
    predictions = tf.constant(self._np_predictions,
                              shape=(self._batch_size, self._num_classes),
                              dtype=tf.float32)
-    labels = tf.constant(self._np_labels, shape=(self._batch_size,))
+    labels = tf.constant(
+        self._np_labels, shape=(self._batch_size,), dtype=tf.int64)
    recall, update_op = metrics.streaming_recall_at_k(
        predictions, labels, k=1)
+    sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k(
+        predictions, tf.reshape(labels, (self._batch_size, 1)), k=1)

    with self.test_session() as sess:
      sess.run(tf.initialize_local_variables())
      self.assertEqual(0.25, sess.run(update_op))
      self.assertEqual(0.25, recall.eval())
+      self.assertEqual(0.25, sess.run(sp_update_op))
+      self.assertEqual(0.25, sp_recall.eval())

  def testSingleUpdateKIs2(self):
    predictions = tf.constant(self._np_predictions,
                              shape=(self._batch_size, self._num_classes),
                              dtype=tf.float32)
-    labels = tf.constant(self._np_labels, shape=(self._batch_size,))
+    labels = tf.constant(
+        self._np_labels, shape=(self._batch_size,), dtype=tf.int64)
    recall, update_op = metrics.streaming_recall_at_k(
        predictions, labels, k=2)
+    sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k(
+        predictions, tf.reshape(labels, (self._batch_size, 1)), k=2)

    with self.test_session() as sess:
      sess.run(tf.initialize_local_variables())
      self.assertEqual(0.5, sess.run(update_op))
      self.assertEqual(0.5, recall.eval())
+      self.assertEqual(0.5, sess.run(sp_update_op))
+      self.assertEqual(0.5, sp_recall.eval())

  def testSingleUpdateKIs3(self):
    predictions = tf.constant(self._np_predictions,
                              shape=(self._batch_size, self._num_classes),
                              dtype=tf.float32)
-    labels = tf.constant(self._np_labels, shape=(self._batch_size,))
+    labels = tf.constant(
+        self._np_labels, shape=(self._batch_size,), dtype=tf.int64)
    recall, update_op = metrics.streaming_recall_at_k(
        predictions, labels, k=3)
+    sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k(
+        predictions, tf.reshape(labels, (self._batch_size, 1)), k=3)

    with self.test_session() as sess:
      sess.run(tf.initialize_local_variables())
      self.assertEqual(1.0, sess.run(update_op))
      self.assertEqual(1.0, recall.eval())
+      self.assertEqual(1.0, sess.run(sp_update_op))
+      self.assertEqual(1.0, sp_recall.eval())

  def testSingleUpdateSomeMissingKIs2(self):
    predictions = tf.constant(self._np_predictions,
                              shape=(self._batch_size, self._num_classes),
                              dtype=tf.float32)
-    labels = tf.constant(self._np_labels, shape=(self._batch_size,))
+    labels = tf.constant(
+        self._np_labels, shape=(self._batch_size,), dtype=tf.int64)
    weights = tf.constant([0, 1, 1, 1], shape=(self._batch_size,),
                          dtype=tf.float32)
    mask = tf.constant([False, False, True, False], shape=(self._batch_size,),
                       dtype=tf.bool)
    recall, update_op = metrics.streaming_recall_at_k(
        predictions, labels, k=2, ignore_mask=mask, weights=weights)
+    sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k(
+        predictions, tf.reshape(labels, (self._batch_size, 1)), k=2,
+        ignore_mask=mask, weights=weights)

    with self.test_session() as sess:
      sess.run(tf.initialize_local_variables())
      self.assertEqual(1.0, sess.run(update_op))
      self.assertEqual(1.0, recall.eval())
+      self.assertEqual(1.0, sess.run(sp_update_op))
+      self.assertEqual(1.0, sp_recall.eval())


 class StreamingSparsePrecisionTest(tf.test.TestCase):
@ -1718,8 +1746,8 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):

      # Run per-step op and assert expected values.
      if math.isnan(expected):
-        self.assertTrue(math.isnan(update.eval()))
-        self.assertTrue(math.isnan(metric.eval()))
+        _assert_nan(self, update.eval())
+        _assert_nan(self, metric.eval())
      else:
        self.assertEqual(expected, update.eval())
        self.assertEqual(expected, metric.eval())
@ -1735,7 +1763,7 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
        ignore_mask = tf.constant(ignore_mask, tf.bool)
      predictions = tf.constant(predictions, tf.float32)
      metric = metric_ops.sparse_average_precision_at_k(
-          predictions=predictions, labels=labels, k=k)
+          predictions, labels, k)
      self.assertAllEqual(expected, metric.eval())

  def _test_streaming_sparse_average_precision_at_k(
@ -1745,7 +1773,7 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
        weights = tf.constant(weights, tf.float32)
      predictions = tf.constant(predictions, tf.float32)
      metric, update = metrics.streaming_sparse_average_precision_at_k(
-          predictions=predictions, labels=labels, k=k, weights=weights)
+          predictions, labels, k, weights=weights)

      # Fails without initialized vars.
      self.assertRaises(tf.OpError, metric.eval)
@ -1755,8 +1783,8 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):

      # Run per-step op and assert expected values.
      if math.isnan(expected):
-        self.assertTrue(math.isnan(update.eval()))
-        self.assertTrue(math.isnan(metric.eval()))
+        _assert_nan(self, update.eval())
+        _assert_nan(self, metric.eval())
      else:
        self.assertAlmostEqual(expected, update.eval())
        self.assertAlmostEqual(expected, metric.eval())
@ -1849,89 +1877,97 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
          predictions, labels, k, expected=streaming_average_precision[i],
          weights=weights)

-  def test_one_label_at_k1_no_predictions(self):
+  def test_one_label_at_k1_nan(self):
    predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)

-    # Classes 0,1,2 have 0 predictions, class 4 is out of range.
-    for class_id in [0, 1, 2, 4]:
-      self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=1, expected=NAN, class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 0,1,2 have 0 predictions, class 4 is out of range.
+      for class_id in (0, 1, 2, 4):
+        self._test_streaming_sparse_precision_at_k(
+            predictions, labels, k=1, expected=NAN, class_id=class_id)

  def test_one_label_at_k1(self):
    predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)

-    # Class 3: 1 label, 2 predictions, 1 correct.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=1, expected=1.0 / 2, class_id=3)
+    for labels in (sparse_labels, dense_labels):
+      # Class 3: 1 label, 2 predictions, 1 correct.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=1, expected=1.0 / 2, class_id=3)

-    # All classes: 2 labels, 2 predictions, 1 correct.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=1, expected=1.0 / 2)
+      # All classes: 2 labels, 2 predictions, 1 correct.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=1, expected=1.0 / 2)

  def test_three_labels_at_k5_no_predictions(self):
    predictions = [
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]
    ]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
-    ]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    ])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)

-    # Classes 1,3,8 have 0 predictions, class 10 is out of range.
-    for class_id in [1, 3, 8, 10]:
-      self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 1,3,8 have 0 predictions, class 10 is out of range.
+      for class_id in (1, 3, 8, 10):
+        self._test_streaming_sparse_precision_at_k(
+            predictions, labels, k=5, expected=NAN, class_id=class_id)

  def test_three_labels_at_k5_no_labels(self):
    predictions = [
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]
    ]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
-    ]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    ])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)

-    # Classes 0,4,6,9: 0 labels, >=1 prediction.
-    for class_id in [0, 4, 6, 9]:
-      self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=0.0, class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 0,4,6,9: 0 labels, >=1 prediction.
+      for class_id in (0, 4, 6, 9):
+        self._test_streaming_sparse_precision_at_k(
+            predictions, labels, k=5, expected=0.0, class_id=class_id)

  def test_three_labels_at_k5(self):
    predictions = [
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]
    ]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
-    ]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    ])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)

-    # Class 2: 2 labels, 2 correct predictions.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2, class_id=2)
+    for labels in (sparse_labels, dense_labels):
+      # Class 2: 2 labels, 2 correct predictions.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=5, expected=2.0 / 2,
+          class_id=2)

-    # Class 5: 1 label, 1 correct prediction.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 1, class_id=5)
+      # Class 5: 1 label, 1 correct prediction.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=5, expected=1.0 / 1, class_id=5)

-    # Class 7: 1 label, 1 incorrect prediction.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=0.0 / 1, class_id=7)
+      # Class 7: 1 label, 1 incorrect prediction.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=5, expected=0.0 / 1, class_id=7)

-    # All classes: 10 predictions, 3 correct.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=3.0 / 10)
+      # All classes: 10 predictions, 3 correct.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=5, expected=3.0 / 10)

-  def test_3d_no_predictions(self):
+  def test_3d_nan(self):
    predictions = [[
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]
@ -1939,19 +1975,18 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
    ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
    ], [
        [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])

    # Classes 1,3,8 have 0 predictions, class 10 is out of range.
-    for class_id in [1, 3, 8, 10]:
+    for class_id in (1, 3, 8, 10):
      self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id)
+          predictions, labels, k=5, expected=NAN, class_id=class_id)

  def test_3d_no_labels(self):
    predictions = [[
@ -1961,19 +1996,18 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
    ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
    ], [
        [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])

    # Classes 0,4,6,9: 0 labels, >=1 prediction.
-    for class_id in [0, 4, 6, 9]:
+    for class_id in (0, 4, 6, 9):
      self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=0.0, class_id=class_id)
+          predictions, labels, k=5, expected=0.0, class_id=class_id)

  def test_3d(self):
    predictions = [[
@ -1983,30 +2017,29 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
    ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
    ], [
        [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])

    # Class 2: 4 predictions, all correct.
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=4.0 / 4, class_id=2)
+        predictions, labels, k=5, expected=4.0 / 4, class_id=2)

    # Class 5: 2 predictions, both correct.
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2, class_id=5)
+        predictions, labels, k=5, expected=2.0 / 2, class_id=5)

    # Class 7: 2 predictions, 1 correct.
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 2, class_id=7)
+        predictions, labels, k=5, expected=1.0 / 2, class_id=7)

    # All classes: 20 predictions, 7 correct.
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=7.0 / 20)
+        predictions, labels, k=5, expected=7.0 / 20)

  def test_3d_ignore_all(self):
    predictions = [[
@ -2016,28 +2049,26 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
    ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
    ], [
        [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])

    for class_id in xrange(10):
      self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id,
+          predictions, labels, k=5, expected=NAN, class_id=class_id,
          weights=[[0], [0]])
      self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id,
+          predictions, labels, k=5, expected=NAN, class_id=class_id,
          weights=[[0, 0], [0, 0]])
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=NAN,
-        ignore_mask=[[False], [True]], weights=[[0], [1]])
+        predictions, labels, k=5, expected=NAN, ignore_mask=[[False], [True]],
+        weights=[[0], [1]])
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=NAN,
-        weights=[[0, 0], [0, 0]])
+        predictions, labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]])

  def test_3d_ignore_some(self):
    predictions = [[
@ -2047,43 +2078,42 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
    ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
    ], [
        [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])

    # Class 2: 2 predictions, both correct.
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2,
+        predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
        ignore_mask=[[False], [False]], weights=[[1], [0]])

    # Class 2: 2 predictions, both correct.
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2,
+        predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
        ignore_mask=[[False], [False]], weights=[[0], [1]])

    # Class 7: 1 incorrect prediction.
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=0.0 / 1.0, class_id=7,
+        predictions, labels, k=5, expected=0.0 / 1.0, class_id=7,
        ignore_mask=[[False], [True]], weights=[[1], [1]])

    # Class 7: 1 correct prediction.
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 1.0, class_id=7,
+        predictions, labels, k=5, expected=1.0 / 1.0, class_id=7,
        ignore_mask=[[True], [False]], weights=[[1], [1]])

    # Class 7: no predictions.
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=NAN, class_id=7,
+        predictions, labels, k=5, expected=NAN, class_id=7,
        weights=[[1, 0], [0, 1]])

    # Class 7: 2 predictions, 1 correct.
    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 2.0, class_id=7,
+        predictions, labels, k=5, expected=1.0 / 2.0, class_id=7,
        weights=[[0, 1], [1, 0]])

  def test_sparse_tensor_value(self):
@ -2127,177 +2157,172 @@ class StreamingSparseRecallTest(tf.test.TestCase):

      # Run per-step op and assert expected values.
      if math.isnan(expected):
-        self.assertTrue(math.isnan(update.eval()))
-        self.assertTrue(math.isnan(metric.eval()))
+        _assert_nan(self, update.eval())
+        _assert_nan(self, metric.eval())
      else:
        self.assertEqual(expected, update.eval())
        self.assertEqual(expected, metric.eval())

-  def test_one_label_at_k1_empty_classes(self):
+  def test_one_label_at_k1_nan(self):
    predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)

    # Classes 0,1 have 0 labels, 0 predictions, class 4 is out of range.
-    for class_id in [0, 1, 4]:
-      self._test_streaming_sparse_recall_at_k(
-          predictions=predictions, labels=sp_labels, k=1, expected=NAN,
-          class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      for class_id in (0, 1, 4):
+        self._test_streaming_sparse_recall_at_k(
+            predictions, labels, k=1, expected=NAN,
+            class_id=class_id)

  def test_one_label_at_k1_no_predictions(self):
    predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)

-    # Class 2: 0 predictions.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.0,
-        class_id=2)
+    for labels in (sparse_labels, dense_labels):
+      # Class 2: 0 predictions.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.0,
+          class_id=2)

  def test_one_label_at_k1(self):
    predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)

-    # Class 3: 1 label, 2 predictions, 1 correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        class_id=3)
+    for labels in (sparse_labels, dense_labels):
+      # Class 3: 1 label, 2 predictions, 1 correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1,
+          class_id=3)

-    # All classes: 2 labels, 2 predictions, 1 correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2)
+      # All classes: 2 labels, 2 predictions, 1 correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 2)

  def test_one_label_at_k1_weighted(self):
    predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)

-    # Class 3: 1 label, 2 predictions, 1 correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=NAN,
-        class_id=3, weights=(0.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        class_id=3, weights=(1.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        class_id=3, weights=(2.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=NAN,
-        class_id=3, weights=(0.0, 0.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=NAN,
-        class_id=3, weights=(0.0, 1.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        class_id=3, weights=(1.0, 0.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        class_id=3, weights=(1.0, 1.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=2.0 / 2,
-        class_id=3, weights=(2.0, 3.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=3.0 / 3,
-        class_id=3, weights=(3.0, 2.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.3 / 0.3,
-        class_id=3, weights=(0.3, 0.6))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.6 / 0.6,
-        class_id=3, weights=(0.6, 0.3))
+    for labels in (sparse_labels, dense_labels):
+      # Class 3: 1 label, 2 predictions, 1 correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=NAN, class_id=3, weights=(0.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1, class_id=3,
+          weights=(1.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1, class_id=3,
+          weights=(2.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=NAN, class_id=3,
+          weights=(0.0, 0.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=NAN, class_id=3,
+          weights=(0.0, 1.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1, class_id=3,
+          weights=(1.0, 0.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1, class_id=3,
+          weights=(1.0, 1.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=2.0 / 2, class_id=3,
+          weights=(2.0, 3.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=3.0 / 3, class_id=3,
+          weights=(3.0, 2.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.3 / 0.3, class_id=3,
+          weights=(0.3, 0.6))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.6 / 0.6, class_id=3,
+          weights=(0.6, 0.3))

-    # All classes: 2 labels, 2 predictions, 1 correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=NAN,
-        weights=(0.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2,
-        weights=(1.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2,
-        weights=(2.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        weights=(1.0, 0.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.0 / 1,
-        weights=(0.0, 1.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2,
-        weights=(1.0, 1.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=2.0 / 5,
-        weights=(2.0, 3.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=3.0 / 5,
-        weights=(3.0, 2.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.3 / 0.9,
-        weights=(0.3, 0.6))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.6 / 0.9,
-        weights=(0.6, 0.3))
+      # All classes: 2 labels, 2 predictions, 1 correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=NAN, weights=(0.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 2, weights=(1.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 2, weights=(2.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1, weights=(1.0, 0.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.0 / 1, weights=(0.0, 1.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 2, weights=(1.0, 1.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=2.0 / 5, weights=(2.0, 3.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=3.0 / 5, weights=(3.0, 2.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.3 / 0.9, weights=(0.3, 0.6))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.6 / 0.9, weights=(0.6, 0.3))

-  def test_three_labels_at_k5_no_labels(self):
+  def test_three_labels_at_k5_nan(self):
    predictions = [
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
-        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)

-    # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range.
-    for class_id in [0, 3, 4, 6, 9, 10]:
-      self._test_streaming_sparse_recall_at_k(
-          predictions=predictions, labels=sp_labels, k=5, expected=NAN,
-          class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range.
+      for class_id in (0, 3, 4, 6, 9, 10):
+        self._test_streaming_sparse_recall_at_k(
+            predictions, labels, k=5, expected=NAN, class_id=class_id)

  def test_three_labels_at_k5_no_predictions(self):
    predictions = [
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
-        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)

-    # Class 8: 1 label, no predictions.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=5, expected=0.0 / 1,
-        class_id=8)
+    for labels in (sparse_labels, dense_labels):
+      # Class 8: 1 label, no predictions.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=5, expected=0.0 / 1, class_id=8)

  def test_three_labels_at_k5(self):
    predictions = [
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
-        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)

-    # Class 2: 2 labels, both correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=5, expected=2.0 / 2,
-        class_id=2)
+    for labels in (sparse_labels, dense_labels):
+      # Class 2: 2 labels, both correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=5, expected=2.0 / 2, class_id=2)

-    # Class 5: 1 label, incorrect.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=5, expected=1.0 / 1,
-        class_id=5)
+      # Class 5: 1 label, incorrect.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=5, expected=1.0 / 1, class_id=5)

-    # Class 7: 1 label, incorrect.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=5, expected=0.0 / 1,
-        class_id=7)
+      # Class 7: 1 label, incorrect.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=5, expected=0.0 / 1, class_id=7)

-    # All classes: 6 labels, 3 correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=5, expected=3.0 / 6)
+      # All classes: 6 labels, 3 correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=5, expected=3.0 / 6)

-  def test_3d_no_labels(self):
+  def test_3d_nan(self):
    predictions = [[
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]
@ -2305,19 +2330,26 @@ class StreamingSparseRecallTest(tf.test.TestCase):
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
    ]]
-    labels = [[
+    sparse_labels = _binary_3d_label_to_sparse_value([[
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
    ], [
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
+    dense_labels = np.array([[
+        [2, 7, 8],
+        [1, 2, 5]
+    ], [
+        [1, 2, 5],
+        [2, 7, 8],
+    ]], dtype=np.int64)

-    # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range.
-    for class_id in [0, 3, 4, 6, 9, 10]:
-      self._test_streaming_sparse_recall_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range.
+      for class_id in (0, 3, 4, 6, 9, 10):
+        self._test_streaming_sparse_recall_at_k(
+            predictions, labels, k=5, expected=NAN, class_id=class_id)

  def test_3d_no_predictions(self):
    predictions = [[
@ -2327,19 +2359,26 @@ class StreamingSparseRecallTest(tf.test.TestCase):
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
    ]]
-    labels = [[
+    sparse_labels = _binary_3d_label_to_sparse_value([[
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
    ], [
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
+    dense_labels = np.array([[
+        [2, 7, 8],
+        [1, 2, 5]
+    ], [
+        [1, 2, 5],
+        [2, 7, 8],
+    ]], dtype=np.int64)

-    # Classes 1,8 have 0 predictions, >=1 label.
-    for class_id in [1, 8]:
-      self._test_streaming_sparse_recall_at_k(
-          predictions, sp_labels, k=5, expected=0.0, class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 1,8 have 0 predictions, >=1 label.
+      for class_id in (1, 8):
+        self._test_streaming_sparse_recall_at_k(
+            predictions, labels, k=5, expected=0.0, class_id=class_id)

  def test_3d(self):
    predictions = [[
@ -2349,30 +2388,29 @@ class StreamingSparseRecallTest(tf.test.TestCase):
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
    ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
    ], [
        [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])

    # Class 2: 4 labels, all correct.
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=4.0 / 4, class_id=2)
+        predictions, labels, k=5, expected=4.0 / 4, class_id=2)

    # Class 5: 2 labels, both correct.
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2, class_id=5)
+        predictions, labels, k=5, expected=2.0 / 2, class_id=5)

    # Class 7: 2 labels, 1 incorrect.
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 2, class_id=7)
+        predictions, labels, k=5, expected=1.0 / 2, class_id=7)

    # All classes: 12 labels, 7 correct.
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=7.0 / 12)
+        predictions, labels, k=5, expected=7.0 / 12)

  def test_3d_ignore_all(self):
    predictions = [[
@ -2382,27 +2420,26 @@ class StreamingSparseRecallTest(tf.test.TestCase):
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
    ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
    ], [
        [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])

    for class_id in xrange(10):
      self._test_streaming_sparse_recall_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id,
+          predictions, labels, k=5, expected=NAN, class_id=class_id,
          weights=[[0], [0]])
      self._test_streaming_sparse_recall_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id,
+          predictions, labels, k=5, expected=NAN, class_id=class_id,
          weights=[[0, 0], [0, 0]])
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=NAN,
-        ignore_mask=[[False], [True]], weights=[[0], [1]])
+        predictions, labels, k=5, expected=NAN, ignore_mask=[[False], [True]],
+        weights=[[0], [1]])
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]])
+        predictions, labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]])

  def test_3d_ignore_some(self):
    predictions = [[
@ -2412,43 +2449,42 @@ class StreamingSparseRecallTest(tf.test.TestCase):
        [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
        [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
    ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
        [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
    ], [
        [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])

    # Class 2: 2 labels, both correct.
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2,
+        predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
        ignore_mask=[[False], [False]], weights=[[1], [0]])

    # Class 2: 2 labels, both correct.
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2,
+        predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
        ignore_mask=[[False], [False]], weights=[[0], [1]])

    # Class 7: 1 label, correct.
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 1.0, class_id=7,
+        predictions, labels, k=5, expected=1.0 / 1.0, class_id=7,
        ignore_mask=[[True], [False]], weights=[[1], [1]])

    # Class 7: 1 label, incorrect.
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=0.0 / 1.0, class_id=7,
+        predictions, labels, k=5, expected=0.0 / 1.0, class_id=7,
        ignore_mask=[[False], [True]], weights=[[1], [1]])

    # Class 7: 2 labels, 1 correct.
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 2.0, class_id=7,
+        predictions, labels, k=5, expected=1.0 / 2.0, class_id=7,
        weights=[[1, 0], [1, 0]])

    # Class 7: No labels.
    self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=NAN, class_id=7,
+        predictions, labels, k=5, expected=NAN, class_id=7,
        weights=[[0, 1], [0, 1]])

  def test_sparse_tensor_value(self):
@ -3678,6 +3714,22 @@ class StreamingConcatTest(tf.test.TestCase):
    with self.assertRaises(ValueError):
      metrics.streaming_concat(tf.placeholder(tf.float32, [None, None]))

+  def testStreamingConcatReset(self):
+    with self.test_session() as sess:
+      values = tf.placeholder(tf.int32, [None])
+      concatenated, update_op = metrics.streaming_concat(values)
+      sess.run(tf.initialize_local_variables())
+
+      self.assertAllEqual([], concatenated.eval())
+
+      sess.run([update_op], feed_dict={values: [0, 1, 2]})
+      self.assertAllEqual([0, 1, 2], concatenated.eval())
+
+      sess.run(tf.initialize_local_variables())
+
+      sess.run([update_op], feed_dict={values: [3, 4]})
+      self.assertAllEqual([3, 4], concatenated.eval())
+

 class AggregateMetricsTest(tf.test.TestCase):

@ -3928,7 +3980,8 @@ class ExpandAndTileTest(tf.test.TestCase):
          indices=[[0, i[0], i[1]] for i in x.indices], values=x.values,
          shape=[1, 3, 3])
      self._assert_sparse_tensors_equal(
-          expected_result_dim0, metric_ops.expand_and_tile(x, multiple=1).eval())
+          expected_result_dim0,
+          metric_ops.expand_and_tile(x, multiple=1).eval())
      for dim in (-2, 0):
        self._assert_sparse_tensors_equal(
            expected_result_dim0,
--- a/tensorflow/contrib/quantization/kernels/hexagon/BUILD
+++ b/tensorflow/contrib/quantization/kernels/hexagon/BUILD
@ -11,6 +11,7 @@ licenses(["notice"])  # Apache 2.0
 load(
    "//tensorflow:tensorflow.bzl",
    "tf_cc_test",
+    "tf_kernel_library",
 )

 filegroup(
@ -43,3 +44,36 @@ tf_cc_test(
        "//tensorflow/core/kernels:ops_util",
    ],
 )
+
+tf_cc_test(
+    name = "graph_transferer_test",
+    size = "small",
+    srcs = ["graph_transferer_test.cc"],
+    deps = [
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/contrib/quantization/kernels/hexagon:graph_transferer",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:direct_session",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:ops",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
+tf_kernel_library(
+    name = "graph_transferer",
+    srcs = [
+        "graph_transferer.cc",
+    ],
+    hdrs = [
+        "graph_transferer.h",
+    ],
+    deps = [
+        "//tensorflow/core",
+        "//tensorflow/core:framework",
+        "//third_party/eigen3",
+    ],
+)
--- a/Show More
+++ b/Show More