diff --git a/configure b/configure
index 426071e48d0..08078d29d5c 100755
--- a/configure
+++ b/configure
@@ -109,7 +109,7 @@ fi
 
 ## Find swig path
 if [ -z "$SWIG_PATH" ]; then
-  SWIG_PATH=`type -p swig 2> /dev/null`
+  SWIG_PATH=`type -p swig 2> /dev/null || true`
 fi
 if [[ ! -e "$SWIG_PATH" ]]; then
   echo "Can't find swig.  Ensure swig is in \$PATH or set \$SWIG_PATH."
diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD
index e0cdf06938c..bb2a6063b5c 100644
--- a/tensorflow/cc/saved_model/BUILD
+++ b/tensorflow/cc/saved_model/BUILD
@@ -28,7 +28,6 @@ cc_library(
     deps = [
         ":constants",
         "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:tensorflow",
@@ -45,7 +44,9 @@ tf_cc_test(
     deps = [
         ":constants",
         ":loader",
+        ":signature_constants",
         "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
diff --git a/tensorflow/cc/saved_model/constants.h b/tensorflow/cc/saved_model/constants.h
index fba1dca5345..b97f6c84faf 100644
--- a/tensorflow/cc/saved_model/constants.h
+++ b/tensorflow/cc/saved_model/constants.h
@@ -18,6 +18,12 @@ limitations under the License.
 
 namespace tensorflow {
 
+// SavedModel assets directory.
+constexpr char kSavedModelAssetsDirectory[] = "assets";
+
+// SavedModel assets key for graph collection-def.
+constexpr char kSavedModelAssetsKey[] = "saved_model_assets";
+
 // SavedModel proto filename.
 constexpr char kSavedModelFilenamePb[] = "saved_model.pb";
 
diff --git a/tensorflow/cc/saved_model/loader_test.cc b/tensorflow/cc/saved_model/loader_test.cc
index b3366dec4a3..fc21266518f 100644
--- a/tensorflow/cc/saved_model/loader_test.cc
+++ b/tensorflow/cc/saved_model/loader_test.cc
@@ -16,6 +16,9 @@ limitations under the License.
 #include "tensorflow/cc/saved_model/loader.h"
 
 #include "tensorflow/cc/saved_model/constants.h"
+#include "tensorflow/cc/saved_model/signature_constants.h"
+#include "tensorflow/core/example/example.pb.h"
+#include "tensorflow/core/example/feature.pb.h"
 #include "tensorflow/core/framework/tensor_testutil.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -34,17 +37,35 @@ class LoaderTest : public ::testing::Test {
  protected:
   LoaderTest() {}
 
-  void CheckSavedModelBundle(const SavedModelBundle& bundle) {
-    // Validate the half plus two behavior.
-    Tensor input = test::AsTensor<float>({0, 1, 2, 3}, TensorShape({4, 1}));
+  string MakeSerializedExample(float x) {
+    tensorflow::Example example;
+    auto* feature_map = example.mutable_features()->mutable_feature();
+    (*feature_map)["x"].mutable_float_list()->add_value(x);
+    return example.SerializeAsString();
+  }
+
+  void CheckSavedModelBundle(const string& export_dir,
+                             const SavedModelBundle& bundle) {
+    const string asset_path =
+        io::JoinPath(export_dir, kSavedModelAssetsDirectory, "foo.txt");
+    EXPECT_TRUE(Env::Default()->FileExists(asset_path));
 
     // Retrieve the regression signature from meta graph def.
     const auto signature_def_map = bundle.meta_graph_def.signature_def();
-    const auto signature_def = signature_def_map.at("regression");
+    const auto signature_def = signature_def_map.at(kRegressMethodName);
 
-    const string input_name = signature_def.inputs().at("input").name();
-    const string output_name = signature_def.outputs().at("output").name();
+    const string input_name = signature_def.inputs().at(kRegressInputs).name();
+    const string output_name =
+        signature_def.outputs().at(kRegressOutputs).name();
 
+    std::vector<string> serialized_examples;
+    for (float x : {0, 1, 2, 3}) {
+      serialized_examples.push_back(MakeSerializedExample(x));
+    }
+
+    // Validate the half plus two behavior.
+    Tensor input =
+        test::AsTensor<string>(serialized_examples, TensorShape({4}));
     std::vector<Tensor> outputs;
     TF_ASSERT_OK(bundle.session->Run({{input_name, input}}, {output_name}, {},
                                      &outputs));
@@ -65,11 +86,11 @@ TEST_F(LoaderTest, ResourceLeakTest) {
   RunOptions run_options;
 
   const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
   for (int i = 0; i < 100; ++i) {
     TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir,
                                 {kSavedModelTagServe}, &bundle));
-    CheckSavedModelBundle(bundle);
+    CheckSavedModelBundle(export_dir, bundle);
   }
 }
 
@@ -79,10 +100,10 @@ TEST_F(LoaderTest, TagMatch) {
   RunOptions run_options;
 
   const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
   TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir,
                               {kSavedModelTagServe}, &bundle));
-  CheckSavedModelBundle(bundle);
+  CheckSavedModelBundle(export_dir, bundle);
 }
 
 TEST_F(LoaderTest, NoTagMatch) {
@@ -91,7 +112,7 @@ TEST_F(LoaderTest, NoTagMatch) {
   SessionOptions session_options;
 
   const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
   Status st = LoadSavedModel(session_options, run_options, export_dir,
                              {"missing-tag"}, &bundle);
   EXPECT_FALSE(st.ok());
@@ -107,7 +128,7 @@ TEST_F(LoaderTest, NoTagMatchMultiple) {
   SessionOptions session_options;
 
   const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
   Status st = LoadSavedModel(session_options, run_options, export_dir,
                              {kSavedModelTagServe, "missing-tag"}, &bundle);
   EXPECT_FALSE(st.ok());
@@ -126,19 +147,19 @@ TEST_F(LoaderTest, PbtxtFormat) {
       io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPbTxt);
   TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir,
                               {kSavedModelTagServe}, &bundle));
-  CheckSavedModelBundle(bundle);
+  CheckSavedModelBundle(export_dir, bundle);
 }
 
-TEST_F(LoaderTest, ShardedVariables) {
+TEST_F(LoaderTest, SingleShardVariables) {
   SavedModelBundle bundle;
   SessionOptions session_options;
   RunOptions run_options;
 
   const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
   TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir,
                               {kSavedModelTagServe}, &bundle));
-  CheckSavedModelBundle(bundle);
+  CheckSavedModelBundle(export_dir, bundle);
 }
 
 TEST_F(LoaderTest, InvalidExportPath) {
@@ -156,7 +177,7 @@ TEST_F(LoaderTest, InvalidExportPath) {
 TEST_F(LoaderTest, MaybeSavedModelDirectory) {
   // Valid SavedModel directory.
   const string export_dir =
-      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb);
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded);
   EXPECT_TRUE(MaybeSavedModelDirectory(export_dir));
 
   // Directory that does not exist.
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt b/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt
new file mode 100644
index 00000000000..f9ff0366880
--- /dev/null
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt
@@ -0,0 +1 @@
+asset-file-contents
\ No newline at end of file
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb b/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb
index 5a2dd4dd841..d0f0853aa87 100644
Binary files a/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb and b/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb differ
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/variables/checkpoint b/tensorflow/cc/saved_model/testdata/half_plus_two/variables/checkpoint
deleted file mode 100644
index 88f46487280..00000000000
--- a/tensorflow/cc/saved_model/testdata/half_plus_two/variables/checkpoint
+++ /dev/null
@@ -1,2 +0,0 @@
-model_checkpoint_path: "/tmp/saved_model/half_plus_two/variables/variables"
-all_model_checkpoint_paths: "/tmp/saved_model/half_plus_two/variables/variables"
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/assets/foo.txt b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/assets/foo.txt
new file mode 100644
index 00000000000..f9ff0366880
--- /dev/null
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/assets/foo.txt
@@ -0,0 +1 @@
+asset-file-contents
\ No newline at end of file
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt
index 30c2c25a197..2e714d262db 100644
--- a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt
@@ -140,6 +140,88 @@ meta_graphs {
       op {
         name: "NoOp"
       }
+      op {
+        name: "ParseExample"
+        input_arg {
+          name: "serialized"
+          type: DT_STRING
+        }
+        input_arg {
+          name: "names"
+          type: DT_STRING
+        }
+        input_arg {
+          name: "sparse_keys"
+          type: DT_STRING
+          number_attr: "Nsparse"
+        }
+        input_arg {
+          name: "dense_keys"
+          type: DT_STRING
+          number_attr: "Ndense"
+        }
+        input_arg {
+          name: "dense_defaults"
+          type_list_attr: "Tdense"
+        }
+        output_arg {
+          name: "sparse_indices"
+          type: DT_INT64
+          number_attr: "Nsparse"
+        }
+        output_arg {
+          name: "sparse_values"
+          type_list_attr: "sparse_types"
+        }
+        output_arg {
+          name: "sparse_shapes"
+          type: DT_INT64
+          number_attr: "Nsparse"
+        }
+        output_arg {
+          name: "dense_values"
+          type_list_attr: "Tdense"
+        }
+        attr {
+          name: "Nsparse"
+          type: "int"
+          has_minimum: true
+        }
+        attr {
+          name: "Ndense"
+          type: "int"
+          has_minimum: true
+        }
+        attr {
+          name: "sparse_types"
+          type: "list(type)"
+          has_minimum: true
+          allowed_values {
+            list {
+              type: DT_FLOAT
+              type: DT_INT64
+              type: DT_STRING
+            }
+          }
+        }
+        attr {
+          name: "Tdense"
+          type: "list(type)"
+          has_minimum: true
+          allowed_values {
+            list {
+              type: DT_FLOAT
+              type: DT_INT64
+              type: DT_STRING
+            }
+          }
+        }
+        attr {
+          name: "dense_shapes"
+          type: "list(shape)"
+          has_minimum: true
+        }
+      }
       op {
         name: "Placeholder"
         output_arg {
@@ -160,33 +242,28 @@ meta_graphs {
         }
       }
       op {
-        name: "RestoreSlice"
+        name: "RestoreV2"
         input_arg {
-          name: "file_pattern"
+          name: "prefix"
           type: DT_STRING
         }
         input_arg {
-          name: "tensor_name"
+          name: "tensor_names"
           type: DT_STRING
         }
         input_arg {
-          name: "shape_and_slice"
+          name: "shape_and_slices"
           type: DT_STRING
         }
         output_arg {
-          name: "tensor"
-          type_attr: "dt"
+          name: "tensors"
+          type_list_attr: "dtypes"
         }
         attr {
-          name: "dt"
-          type: "type"
-        }
-        attr {
-          name: "preferred_shard"
-          type: "int"
-          default_value {
-            i: -1
-          }
+          name: "dtypes"
+          type: "list(type)"
+          has_minimum: true
+          minimum: 1
         }
       }
       op {
@@ -214,6 +291,40 @@ meta_graphs {
           minimum: 1
         }
       }
+      op {
+        name: "ShardedFilename"
+        input_arg {
+          name: "basename"
+          type: DT_STRING
+        }
+        input_arg {
+          name: "shard"
+          type: DT_INT32
+        }
+        input_arg {
+          name: "num_shards"
+          type: DT_INT32
+        }
+        output_arg {
+          name: "filename"
+          type: DT_STRING
+        }
+      }
+      op {
+        name: "ShardedFilespec"
+        input_arg {
+          name: "basename"
+          type: DT_STRING
+        }
+        input_arg {
+          name: "num_shards"
+          type: DT_INT32
+        }
+        output_arg {
+          name: "filename"
+          type: DT_STRING
+        }
+      }
       op {
         name: "Variable"
         output_arg {
@@ -524,7 +635,7 @@ meta_graphs {
       }
     }
     node {
-      name: "x"
+      name: "tf_example"
       op: "Placeholder"
       attr {
         key: "_output_shapes"
@@ -539,7 +650,7 @@ meta_graphs {
       attr {
         key: "dtype"
         value {
-          type: DT_FLOAT
+          type: DT_STRING
         }
       }
       attr {
@@ -550,6 +661,190 @@ meta_graphs {
         }
       }
     }
+    node {
+      name: "ParseExample/Const"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_FLOAT
+            tensor_shape {
+              dim {
+              }
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "ParseExample/ParseExample/names"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_STRING
+            tensor_shape {
+              dim {
+              }
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "ParseExample/ParseExample/dense_keys_0"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_STRING
+            tensor_shape {
+            }
+            string_val: "x"
+          }
+        }
+      }
+    }
+    node {
+      name: "ParseExample/ParseExample"
+      op: "ParseExample"
+      input: "tf_example"
+      input: "ParseExample/ParseExample/names"
+      input: "ParseExample/ParseExample/dense_keys_0"
+      input: "ParseExample/Const"
+      attr {
+        key: "Ndense"
+        value {
+          i: 1
+        }
+      }
+      attr {
+        key: "Nsparse"
+        value {
+          i: 0
+        }
+      }
+      attr {
+        key: "Tdense"
+        value {
+          list {
+            type: DT_FLOAT
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dense_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "sparse_types"
+        value {
+          list {
+          }
+        }
+      }
+    }
+    node {
+      name: "x"
+      op: "Identity"
+      input: "ParseExample/ParseExample"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+    }
     node {
       name: "Mul"
       op: "Mul"
@@ -566,7 +861,12 @@ meta_graphs {
         value {
           list {
             shape {
-              unknown_rank: true
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
             }
           }
         }
@@ -588,7 +888,38 @@ meta_graphs {
         value {
           list {
             shape {
-              unknown_rank: true
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "Identity"
+      op: "Identity"
+      input: "y"
+      attr {
+        key: "T"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
             }
           }
         }
@@ -630,6 +961,82 @@ meta_graphs {
         }
       }
     }
+    node {
+      name: "save/num_shards"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT32
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_INT32
+            tensor_shape {
+            }
+            int_val: 1
+          }
+        }
+      }
+    }
+    node {
+      name: "save/ShardedFilename/shard"
+      op: "Const"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT32
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_INT32
+            tensor_shape {
+            }
+            int_val: 0
+          }
+        }
+      }
+    }
+    node {
+      name: "save/ShardedFilename"
+      op: "ShardedFilename"
+      input: "save/Const"
+      input: "save/ShardedFilename/shard"
+      input: "save/num_shards"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+    }
     node {
       name: "save/save/tensor_names"
       op: "Const"
@@ -707,7 +1114,7 @@ meta_graphs {
     node {
       name: "save/save"
       op: "SaveSlices"
-      input: "save/Const"
+      input: "save/ShardedFilename"
       input: "save/save/tensor_names"
       input: "save/save/shapes_and_slices"
       input: "a"
@@ -725,7 +1132,7 @@ meta_graphs {
     node {
       name: "save/control_dependency"
       op: "Identity"
-      input: "save/Const"
+      input: "save/ShardedFilename"
       input: "^save/save"
       attr {
         key: "T"
@@ -737,7 +1144,7 @@ meta_graphs {
         key: "_class"
         value {
           list {
-            s: "loc:@save/Const"
+            s: "loc:@save/ShardedFilename"
           }
         }
       }
@@ -752,13 +1159,32 @@ meta_graphs {
       }
     }
     node {
-      name: "save/restore_slice/tensor_name"
+      name: "save/ShardedFilespec"
+      op: "ShardedFilespec"
+      input: "save/Const"
+      input: "save/num_shards"
+      input: "^save/control_dependency"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "save/RestoreV2/tensor_names"
       op: "Const"
       attr {
         key: "_output_shapes"
         value {
           list {
             shape {
+              dim {
+                size: 1
+              }
             }
           }
         }
@@ -775,6 +1201,9 @@ meta_graphs {
           tensor {
             dtype: DT_STRING
             tensor_shape {
+              dim {
+                size: 1
+              }
             }
             string_val: "a"
           }
@@ -782,13 +1211,16 @@ meta_graphs {
       }
     }
     node {
-      name: "save/restore_slice/shape_and_slice"
+      name: "save/RestoreV2/shape_and_slices"
       op: "Const"
       attr {
         key: "_output_shapes"
         value {
           list {
             shape {
+              dim {
+                size: 1
+              }
             }
           }
         }
@@ -805,6 +1237,9 @@ meta_graphs {
           tensor {
             dtype: DT_STRING
             tensor_shape {
+              dim {
+                size: 1
+              }
             }
             string_val: ""
           }
@@ -812,11 +1247,11 @@ meta_graphs {
       }
     }
     node {
-      name: "save/restore_slice"
-      op: "RestoreSlice"
+      name: "save/RestoreV2"
+      op: "RestoreV2"
       input: "save/Const"
-      input: "save/restore_slice/tensor_name"
-      input: "save/restore_slice/shape_and_slice"
+      input: "save/RestoreV2/tensor_names"
+      input: "save/RestoreV2/shape_and_slices"
       attr {
         key: "_output_shapes"
         value {
@@ -828,15 +1263,11 @@ meta_graphs {
         }
       }
       attr {
-        key: "dt"
+        key: "dtypes"
         value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "preferred_shard"
-        value {
-          i: -1
+          list {
+            type: DT_FLOAT
+          }
         }
       }
     }
@@ -844,7 +1275,7 @@ meta_graphs {
       name: "save/Assign"
       op: "Assign"
       input: "a"
-      input: "save/restore_slice"
+      input: "save/RestoreV2"
       attr {
         key: "T"
         value {
@@ -882,13 +1313,16 @@ meta_graphs {
       }
     }
     node {
-      name: "save/restore_slice_1/tensor_name"
+      name: "save/RestoreV2_1/tensor_names"
       op: "Const"
       attr {
         key: "_output_shapes"
         value {
           list {
             shape {
+              dim {
+                size: 1
+              }
             }
           }
         }
@@ -905,6 +1339,9 @@ meta_graphs {
           tensor {
             dtype: DT_STRING
             tensor_shape {
+              dim {
+                size: 1
+              }
             }
             string_val: "b"
           }
@@ -912,13 +1349,16 @@ meta_graphs {
       }
     }
     node {
-      name: "save/restore_slice_1/shape_and_slice"
+      name: "save/RestoreV2_1/shape_and_slices"
       op: "Const"
       attr {
         key: "_output_shapes"
         value {
           list {
             shape {
+              dim {
+                size: 1
+              }
             }
           }
         }
@@ -935,6 +1375,9 @@ meta_graphs {
           tensor {
             dtype: DT_STRING
             tensor_shape {
+              dim {
+                size: 1
+              }
             }
             string_val: ""
           }
@@ -942,11 +1385,11 @@ meta_graphs {
       }
     }
     node {
-      name: "save/restore_slice_1"
-      op: "RestoreSlice"
+      name: "save/RestoreV2_1"
+      op: "RestoreV2"
       input: "save/Const"
-      input: "save/restore_slice_1/tensor_name"
-      input: "save/restore_slice_1/shape_and_slice"
+      input: "save/RestoreV2_1/tensor_names"
+      input: "save/RestoreV2_1/shape_and_slices"
       attr {
         key: "_output_shapes"
         value {
@@ -958,15 +1401,11 @@ meta_graphs {
         }
       }
       attr {
-        key: "dt"
+        key: "dtypes"
         value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "preferred_shard"
-        value {
-          i: -1
+          list {
+            type: DT_FLOAT
+          }
         }
       }
     }
@@ -974,7 +1413,7 @@ meta_graphs {
       name: "save/Assign_1"
       op: "Assign"
       input: "b"
-      input: "save/restore_slice_1"
+      input: "save/RestoreV2_1"
       attr {
         key: "T"
         value {
@@ -1012,20 +1451,26 @@ meta_graphs {
       }
     }
     node {
-      name: "save/restore_all"
+      name: "save/restore_shard"
       op: "NoOp"
       input: "^save/Assign"
       input: "^save/Assign_1"
     }
+    node {
+      name: "save/restore_all"
+      op: "NoOp"
+      input: "^save/restore_shard"
+    }
     versions {
       producer: 15
     }
   }
   saver_def {
     filename_tensor_name: "save/Const:0"
-    save_tensor_name: "save/control_dependency:0"
+    save_tensor_name: "save/ShardedFilespec:0"
     restore_op_name: "save/restore_all"
     max_to_keep: 5
+    sharded: true
     keep_checkpoint_every_n_hours: 10000.0
     version: V1
   }
@@ -1048,21 +1493,21 @@ meta_graphs {
     }
   }
   signature_def {
-    key: "regression"
+    key: "tensorflow/serving/regress"
     value {
       inputs {
-        key: "input"
+        key: "inputs"
         value {
-          name: "x:0"
+          name: "tf_example:0"
         }
       }
       outputs {
-        key: "output"
+        key: "outputs"
         value {
-          name: "y:0"
+          name: "Identity:0"
         }
       }
-      method_name: "regression"
+      method_name: "tensorflow/serving/regress"
     }
   }
 }
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/variables/checkpoint b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/variables/checkpoint
deleted file mode 100644
index 76c6cefbbbd..00000000000
--- a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/variables/checkpoint
+++ /dev/null
@@ -1,2 +0,0 @@
-model_checkpoint_path: "/tmp/saved_model/half_plus_two_pbtxt/variables/variables-?????-of-00001"
-all_model_checkpoint_paths: "/tmp/saved_model/half_plus_two_pbtxt/variables/variables-?????-of-00001"
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/assets/foo.txt b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/assets/foo.txt
new file mode 100644
index 00000000000..f9ff0366880
--- /dev/null
+++ b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/assets/foo.txt
@@ -0,0 +1 @@
+asset-file-contents
\ No newline at end of file
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb
index 0a87f3306f5..d0f0853aa87 100644
Binary files a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb and b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb differ
diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/variables/checkpoint b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/variables/checkpoint
deleted file mode 100644
index 1065013315f..00000000000
--- a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/variables/checkpoint
+++ /dev/null
@@ -1,2 +0,0 @@
-model_checkpoint_path: "/tmp/saved_model/half_plus_two/variables/variables-?????-of-00001"
-all_model_checkpoint_paths: "/tmp/saved_model/half_plus_two/variables/variables-?????-of-00001"
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 53e0af10636..36bc4072382 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -97,6 +97,7 @@ from __future__ import print_function
 
 # pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member
 
+from tensorflow.contrib.distributions.python.ops import bijector
 from tensorflow.contrib.distributions.python.ops.bernoulli import *
 from tensorflow.contrib.distributions.python.ops.beta import *
 from tensorflow.contrib.distributions.python.ops.binomial import *
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py b/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py
index c4eea35dc12..e1a8a6d6025 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py
@@ -245,6 +245,23 @@ class BetaTest(tf.test.TestCase):
                           stats.beta.var(a, b),
                           atol=1e-1)
 
+  # Test that sampling with the same seed twice gives the same results.
+  def testBetaSampleMultipleTimes(self):
+    with self.test_session():
+      a_val = 1.
+      b_val = 2.
+      n_val = 100
+
+      tf.set_random_seed(654321)
+      beta1 = tf.contrib.distributions.Beta(a=a_val, b=b_val, name="beta1")
+      samples1 = beta1.sample_n(n_val, seed=123456).eval()
+
+      tf.set_random_seed(654321)
+      beta2 = tf.contrib.distributions.Beta(a=a_val, b=b_val, name="beta2")
+      samples2 = beta2.sample_n(n_val, seed=123456).eval()
+
+      self.assertAllClose(samples1, samples2)
+
   def testBetaSampleMultidimensional(self):
     with self.test_session():
       a = np.random.rand(3, 2, 2).astype(np.float32)
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py
index fe4ac931719..d05f3cfe316 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py
@@ -23,9 +23,20 @@ import math
 import numpy as np
 import tensorflow as tf
 
-from tensorflow.contrib.distributions.python.ops.bijector import _Exp
-from tensorflow.contrib.distributions.python.ops.bijector import _Identity
-from tensorflow.contrib.distributions.python.ops.bijector import _ShiftAndScale
+bijectors = tf.contrib.distributions.bijector
+rng = np.random.RandomState(42)
+
+
+class BaseBijectorTest(tf.test.TestCase):
+  """Tests properties of the Bijector base-class."""
+
+  def testBijector(self):
+    with self.test_session():
+      with self.assertRaisesRegexp(
+          TypeError,
+          ("Can't instantiate abstract class Bijector "
+           "with abstract methods __init__")):
+        bijectors.Bijector()
 
 
 class IdentityBijectorTest(tf.test.TestCase):
@@ -33,7 +44,7 @@ class IdentityBijectorTest(tf.test.TestCase):
 
   def testBijector(self):
     with self.test_session():
-      bijector = _Identity()
+      bijector = bijectors.Identity()
       self.assertEqual("Identity", bijector.name)
       x = [[[0.],
             [1.]]]
@@ -50,7 +61,7 @@ class ExpBijectorTest(tf.test.TestCase):
 
   def testBijector(self):
     with self.test_session():
-      bijector = _Exp(event_ndims=1)
+      bijector = bijectors.Exp(event_ndims=1)
       self.assertEqual("Exp", bijector.name)
       x = [[[1.],
             [2.]]]
@@ -63,14 +74,39 @@ class ExpBijectorTest(tf.test.TestCase):
       self.assertAllClose([[0., -math.log(2.)]], jac.eval())
 
 
-class _ShiftAndScaleBijectorTest(tf.test.TestCase):
+class InlineBijectorTest(tf.test.TestCase):
+  """Tests the correctness of the inline constructed bijector."""
+
+  def testBijector(self):
+    with self.test_session():
+      exp = bijectors.Exp(event_ndims=1)
+      inline = bijectors.Inline(
+          forward_fn=tf.exp,
+          inverse_fn=tf.log,
+          inverse_log_det_jacobian_fn=(
+              lambda y: -tf.reduce_sum(tf.log(x), reduction_indices=-1)),
+          name="Exp")
+
+      self.assertEqual(exp.name, inline.name)
+      x = [[[1., 2.],
+            [3., 4.],
+            [5., 6.]]]
+      self.assertAllClose(exp.forward(x).eval(), inline.forward(x).eval())
+      self.assertAllClose(exp.inverse(x).eval(), inline.inverse(x).eval())
+      self.assertAllClose(exp.inverse_log_det_jacobian(x).eval(),
+                          inline.inverse_log_det_jacobian(x).eval())
+
+
+class ScaleAndShiftBijectorTest(tf.test.TestCase):
+  """Tests the correctness of the Y = scale * x + loc transformation."""
 
   def testProperties(self):
     with self.test_session():
       mu = -1.
       sigma = 2.
-      bijector = _ShiftAndScale(loc=mu, scale=sigma)
-      self.assertEqual("ShiftAndScale", bijector.name)
+      bijector = bijectors.ScaleAndShift(
+          loc=mu, scale=sigma)
+      self.assertEqual("ScaleAndShift", bijector.name)
 
   def testNoBatchScalar(self):
     with self.test_session() as sess:
@@ -85,7 +121,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
       for run in (static_run, dynamic_run):
         mu = -1.
         sigma = 2.  # Scalar.
-        bijector = _ShiftAndScale(loc=mu, scale=sigma)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma)
         self.assertEqual(0, bijector.shaper.batch_ndims.eval())  # "no batches"
         self.assertEqual(0, bijector.shaper.event_ndims.eval())  # "is scalar"
         x = [1., 2, 3]  # Three scalar samples (no batches).
@@ -107,7 +144,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
       for run in (static_run, dynamic_run):
         mu = -1.
         sigma = 2.  # Scalar.
-        bijector = _ShiftAndScale(loc=mu, scale=sigma)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma)
         self.assertEqual(0, bijector.shaper.batch_ndims.eval())  # "no batches"
         self.assertEqual(0, bijector.shaper.event_ndims.eval())  # "is scalar"
         x = [[1., 2, 3],
@@ -134,7 +172,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
       for run in (static_run, dynamic_run):
         mu = [1.]
         sigma = [1.]  # One batch, scalar.
-        bijector = _ShiftAndScale(loc=mu, scale=sigma)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma)
         self.assertEqual(
             1, bijector.shaper.batch_ndims.eval())  # "one batch dim"
         self.assertEqual(
@@ -158,7 +197,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
       for run in (static_run, dynamic_run):
         mu = [1., -1]
         sigma = [1., 1]  # Univariate, two batches.
-        bijector = _ShiftAndScale(loc=mu, scale=sigma)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma)
         self.assertEqual(
             1, bijector.shaper.batch_ndims.eval())  # "one batch dim"
         self.assertEqual(
@@ -182,7 +222,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
       for run in (static_run, dynamic_run):
         mu = [1., -1]
         sigma = np.eye(2, dtype=np.float32)
-        bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma, event_ndims=1)
         self.assertEqual(0, bijector.shaper.batch_ndims.eval())  # "no batches"
         self.assertEqual(1, bijector.shaper.event_ndims.eval())  # "is vector"
         x = [1., 1]
@@ -205,7 +246,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
       for run in (static_run, dynamic_run):
         mu = 1.
         sigma = np.eye(2, dtype=np.float32)
-        bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma, event_ndims=1)
         self.assertEqual(0, bijector.shaper.batch_ndims.eval())  # "no batches"
         self.assertEqual(1, bijector.shaper.event_ndims.eval())  # "is vector"
         x = [1., 1]
@@ -231,7 +273,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
       feed_dict = {x: x_value, mu: mu_value, sigma: sigma_value, event_ndims:
                    event_ndims_value}
 
-      bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=event_ndims)
+      bijector = bijectors.ScaleAndShift(
+          loc=mu, scale=sigma, event_ndims=event_ndims)
       self.assertEqual(0, sess.run(bijector.shaper.batch_ndims, feed_dict))
       self.assertEqual(1, sess.run(bijector.shaper.event_ndims, feed_dict))
       self.assertAllClose([[2., 0]], sess.run(bijector.forward(x), feed_dict))
@@ -252,7 +295,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
       for run in (static_run, dynamic_run):
         mu = [[1., -1]]
         sigma = np.array([np.eye(2, dtype=np.float32)])
-        bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+        bijector = bijectors.ScaleAndShift(
+            loc=mu, scale=sigma, event_ndims=1)
         self.assertEqual(
             1, bijector.shaper.batch_ndims.eval())  # "one batch dim"
         self.assertEqual(
@@ -276,7 +320,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
       feed_dict = {x: x_value, mu: mu_value, sigma: sigma_value,
                    event_ndims: event_ndims_value}
 
-      bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=event_ndims)
+      bijector = bijectors.ScaleAndShift(
+          loc=mu, scale=sigma, event_ndims=event_ndims)
       self.assertEqual(1, sess.run(bijector.shaper.batch_ndims, feed_dict))
       self.assertEqual(1, sess.run(bijector.shaper.event_ndims, feed_dict))
       self.assertAllClose([[[2., 0]]], sess.run(bijector.forward(x), feed_dict))
@@ -285,5 +330,65 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase):
           [0.], sess.run(bijector.inverse_log_det_jacobian(x), feed_dict))
 
 
+class SoftplusBijectorTest(tf.test.TestCase):
+  """Tests the correctness of the Y = g(X) = Log[1 + exp(X)] transformation."""
+
+  def _softplus(self, x):
+    return np.log(1 + np.exp(x))
+
+  def _softplus_inverse(self, y):
+    return np.log(np.exp(y) - 1)
+
+  def _softplus_ildj_before_reduction(self, y):
+    """Inverse log det jacobian, before being reduced."""
+    return -np.log(1 - np.exp(-y))
+
+  def testBijectorForwardInverseEventDimsZero(self):
+    with self.test_session():
+      bijector = bijectors.Softplus(event_ndims=0)
+      self.assertEqual("Softplus", bijector.name)
+      x = 2 * rng.randn(2, 10)
+      y = self._softplus(x)
+
+      self.assertAllClose(y, bijector.forward(x).eval())
+      self.assertAllClose(x, bijector.inverse(y).eval())
+      self.assertAllClose(
+          x, bijector.inverse_and_inverse_log_det_jacobian(y)[0].eval())
+
+  def testBijectorLogDetJacobianEventDimsZero(self):
+    with self.test_session():
+      bijector = bijectors.Softplus(event_ndims=0)
+      y = 2 * rng.rand(2, 10)
+      # No reduction needed if event_dims = 0.
+      ildj = self._softplus_ildj_before_reduction(y)
+
+      self.assertAllClose(ildj, bijector.inverse_log_det_jacobian(y).eval())
+      self.assertAllClose(
+          ildj, bijector.inverse_and_inverse_log_det_jacobian(y)[1].eval())
+
+  def testBijectorForwardInverseEventDimsOne(self):
+    with self.test_session():
+      bijector = bijectors.Softplus(event_ndims=1)
+      self.assertEqual("Softplus", bijector.name)
+      x = 2 * rng.randn(2, 10)
+      y = self._softplus(x)
+
+      self.assertAllClose(y, bijector.forward(x).eval())
+      self.assertAllClose(x, bijector.inverse(y).eval())
+      self.assertAllClose(
+          x, bijector.inverse_and_inverse_log_det_jacobian(y)[0].eval())
+
+  def testBijectorLogDetJacobianEventDimsOne(self):
+    with self.test_session():
+      bijector = bijectors.Softplus(event_ndims=1)
+      y = 2 * rng.rand(2, 10)
+      ildj_before = self._softplus_ildj_before_reduction(y)
+      ildj = np.sum(ildj_before, axis=1)
+
+      self.assertAllClose(ildj, bijector.inverse_log_det_jacobian(y).eval())
+      self.assertAllClose(
+          ildj, bijector.inverse_and_inverse_log_det_jacobian(y)[1].eval())
+
+
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py b/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py
index f871f1961c8..af15f36522a 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py
@@ -334,6 +334,32 @@ class MixtureTest(tf.test.TestCase):
         which_dist_samples = dist_sample_values[c][:size_c]
         self.assertAllClose(which_dist_samples, sample_values[which_c])
 
+  # Test that sampling with the same seed twice gives the same results.
+  def testSampleMultipleTimes(self):
+    # 5 component mixture.
+    logits = [-10.0, -5.0, 0.0, 5.0, 10.0]
+    mus = [-5.0, 0.0, 5.0, 4.0, 20.0]
+    sigmas = [0.1, 5.0, 3.0, 0.2, 4.0]
+
+    with self.test_session():
+      n = 100
+
+      tf.set_random_seed(654321)
+      components = [distributions_py.Normal(
+          mu=mu, sigma=sigma) for mu, sigma in zip(mus, sigmas)]
+      cat = distributions_py.Categorical(logits, dtype=tf.int32, name="cat1")
+      dist1 = distributions_py.Mixture(cat, components, name="mixture1")
+      samples1 = dist1.sample_n(n, seed=123456).eval()
+
+      tf.set_random_seed(654321)
+      components2 = [distributions_py.Normal(
+          mu=mu, sigma=sigma) for mu, sigma in zip(mus, sigmas)]
+      cat2 = distributions_py.Categorical(logits, dtype=tf.int32, name="cat2")
+      dist2 = distributions_py.Mixture(cat2, components2, name="mixture2")
+      samples2 = dist2.sample_n(n, seed=123456).eval()
+
+      self.assertAllClose(samples1, samples2)
+
   def testSampleScalarBatchMultivariate(self):
     with self.test_session() as sess:
       num_components = 3
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py b/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py
index bf0d6f94900..c78ca2d6439 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py
@@ -108,20 +108,54 @@ class StudentTTest(tf.test.TestCase):
       df_v = 4.0
       mu_v = 3.0
       sigma_v = np.sqrt(10.0)
-      n = tf.constant(100000)
+      n = tf.constant(200000)
       student = tf.contrib.distributions.StudentT(df=df, mu=mu, sigma=sigma)
-      samples = student.sample_n(n, seed=137)
+      samples = student.sample_n(n)
       sample_values = samples.eval()
-      n = 100000
-      self.assertEqual(sample_values.shape, (n,))
-      self.assertAllClose(sample_values.mean(), mu_v, atol=1e-2)
+      n_val = 200000
+      self.assertEqual(sample_values.shape, (n_val,))
+      self.assertAllClose(sample_values.mean(), mu_v, rtol=1e-2, atol=0)
       self.assertAllClose(sample_values.var(),
                           sigma_v**2 * df_v / (df_v - 2),
-                          atol=.25)
+                          rtol=1e-2, atol=0)
       self._checkKLApprox(df_v, mu_v, sigma_v, sample_values)
 
-  def _testStudentSampleMultiDimensional(self):
-    # DISABLED: Please enable this test once b/issues/30149644 is resolved.
+  # Test that sampling with the same seed twice gives the same results.
+  def testStudentSampleMultipleTimes(self):
+    with self.test_session():
+      df = tf.constant(4.0)
+      mu = tf.constant(3.0)
+      sigma = tf.constant(math.sqrt(10.0))
+      df_v = 4.0
+      mu_v = 3.0
+      sigma_v = np.sqrt(10.0)
+      n = tf.constant(100)
+
+      tf.set_random_seed(654321)
+      student = tf.contrib.distributions.StudentT(
+          df=df, mu=mu, sigma=sigma, name="student_t1")
+      samples1 = student.sample_n(n, seed=123456).eval()
+
+      tf.set_random_seed(654321)
+      student2 = tf.contrib.distributions.StudentT(
+          df=df, mu=mu, sigma=sigma, name="student_t2")
+      samples2 = student2.sample_n(n, seed=123456).eval()
+
+      self.assertAllClose(samples1, samples2)
+
+  def testStudentSampleSmallDfNoNan(self):
+    with self.test_session():
+      df_v = [1e-1, 1e-5, 1e-10, 1e-20]
+      df = tf.constant(df_v)
+      n = tf.constant(200000)
+      student = tf.contrib.distributions.StudentT(df=df, mu=1.0, sigma=1.0)
+      samples = student.sample_n(n)
+      sample_values = samples.eval()
+      n_val = 200000
+      self.assertEqual(sample_values.shape, (n_val, 4))
+      self.assertTrue(np.all(np.logical_not(np.isnan(sample_values))))
+
+  def testStudentSampleMultiDimensional(self):
     with self.test_session():
       batch_size = 7
       df = tf.constant([[3.0, 7.0]] * batch_size)
@@ -130,20 +164,22 @@ class StudentTTest(tf.test.TestCase):
       df_v = [3.0, 7.0]
       mu_v = [3.0, -3.0]
       sigma_v = [np.sqrt(10.0), np.sqrt(15.0)]
-      n = tf.constant(100000)
+      n = tf.constant(200000)
       student = tf.contrib.distributions.StudentT(df=df, mu=mu, sigma=sigma)
       samples = student.sample_n(n)
       sample_values = samples.eval()
-      self.assertEqual(samples.get_shape(), (100000, batch_size, 2))
-      self.assertAllClose(sample_values[:, 0, 0].mean(), mu_v[0], atol=.15)
+      self.assertEqual(samples.get_shape(), (200000, batch_size, 2))
+      self.assertAllClose(
+          sample_values[:, 0, 0].mean(), mu_v[0], rtol=1e-2, atol=0)
       self.assertAllClose(sample_values[:, 0, 0].var(),
                           sigma_v[0]**2 * df_v[0] / (df_v[0] - 2),
-                          atol=1)
+                          rtol=1e-1, atol=0)
       self._checkKLApprox(df_v[0], mu_v[0], sigma_v[0], sample_values[:, 0, 0])
-      self.assertAllClose(sample_values[:, 0, 1].mean(), mu_v[1], atol=.01)
+      self.assertAllClose(
+          sample_values[:, 0, 1].mean(), mu_v[1], rtol=1e-2, atol=0)
       self.assertAllClose(sample_values[:, 0, 1].var(),
                           sigma_v[1]**2 * df_v[1] / (df_v[1] - 2),
-                          atol=.25)
+                          rtol=1e-1, atol=0)
       self._checkKLApprox(df_v[0], mu_v[0], sigma_v[0], sample_values[:, 0, 1])
 
   def _checkKLApprox(self, df, mu, sigma, samples):
@@ -337,8 +373,7 @@ class StudentTTest(tf.test.TestCase):
       mode = student.mode().eval()
       self.assertAllClose([-1., 0, 1], mode)
 
-  def _testPdfOfSample(self):
-    # DISABLED: Please enable this test once b/issues/30149644 is resolved.
+  def testPdfOfSample(self):
     with self.test_session() as sess:
       student = tf.contrib.distributions.StudentT(df=3., mu=np.pi, sigma=1.)
       num = 20000
@@ -357,8 +392,7 @@ class StudentTTest(tf.test.TestCase):
       # Verify integral over sample*pdf ~= 1.
       self._assertIntegral(sample_vals, pdf_vals)
 
-  def _testPdfOfSampleMultiDims(self):
-    # DISABLED: Please enable this test once b/issues/30149644 is resolved.
+  def testPdfOfSampleMultiDims(self):
     with self.test_session() as sess:
       student = tf.contrib.distributions.StudentT(df=[7., 11.],
                                                   mu=[[5.], [6.]],
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
index c97473cf4f0..ba6a998d466 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
@@ -33,12 +33,8 @@ class TransformedDistributionTest(tf.test.TestCase):
       # Note: the Jacobian callable only works for this example; more generally
       # you may or may not need a reduce_sum.
       log_normal = tf.contrib.distributions.TransformedDistribution(
-          base_dist_cls=tf.contrib.distributions.Normal,
-          mu=mu,
-          sigma=sigma,
-          transform=lambda x: tf.exp(x),
-          inverse=lambda y: tf.log(y),
-          log_det_jacobian=(lambda x: x))
+          base_distribution=tf.contrib.distributions.Normal(mu=mu, sigma=sigma),
+          bijector=tf.contrib.distributions.bijector.Exp(event_ndims=0))
       sp_dist = stats.lognorm(s=sigma, scale=np.exp(mu))
 
       # sample
@@ -67,12 +63,8 @@ class TransformedDistributionTest(tf.test.TestCase):
       mu = 3.0
       sigma = 0.02
       log_normal = tf.contrib.distributions.TransformedDistribution(
-          base_dist_cls=tf.contrib.distributions.Normal,
-          mu=mu,
-          sigma=sigma,
-          transform=lambda x: tf.exp(x),
-          inverse=None,
-          log_det_jacobian=(lambda x: tf.reduce_sum(x)))
+          base_distribution=tf.contrib.distributions.Normal(mu=mu, sigma=sigma),
+          bijector=tf.contrib.distributions.bijector.Exp(event_ndims=0))
 
       sample = log_normal.sample_n(1)
       sample_val, log_pdf_val = sess.run([sample, log_normal.log_pdf(sample)])
@@ -82,10 +74,6 @@ class TransformedDistributionTest(tf.test.TestCase):
           log_pdf_val,
           atol=1e-2)
 
-      with self.assertRaisesRegexp(ValueError,
-                                   "was not returned from `sample`"):
-        log_normal.log_pdf(tf.constant(3.0))
-
 
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py b/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py
index 521b0d4b2dd..8a0cf5ae1d9 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py
@@ -149,6 +149,30 @@ class WishartCholeskyTest(tf.test.TestCase):
                           variance_estimate,
                           rtol=0.05)
 
+  # Test that sampling with the same seed twice gives the same results.
+  def testSampleMultipleTimes(self):
+    with self.test_session():
+      df = 4.
+      n_val = 100
+
+      tf.set_random_seed(654321)
+      chol_w1 = distributions.WishartCholesky(
+          df=df,
+          scale=chol(make_pd(1., 3)),
+          cholesky_input_output_matrices=False,
+          name="wishart1")
+      samples1 = chol_w1.sample_n(n_val, seed=123456).eval()
+
+      tf.set_random_seed(654321)
+      chol_w2 = distributions.WishartCholesky(
+          df=df,
+          scale=chol(make_pd(1., 3)),
+          cholesky_input_output_matrices=False,
+          name="wishart2")
+      samples2 = chol_w2.sample_n(n_val, seed=123456).eval()
+
+      self.assertAllClose(samples1, samples2)
+
   def testProb(self):
     with self.test_session():
       # Generate some positive definite (pd) matrices and their Cholesky
diff --git a/tensorflow/contrib/distributions/python/ops/beta.py b/tensorflow/contrib/distributions/python/ops/beta.py
index 7f77254a644..684d6ec56b2 100644
--- a/tensorflow/contrib/distributions/python/ops/beta.py
+++ b/tensorflow/contrib/distributions/python/ops/beta.py
@@ -197,7 +197,8 @@ class Beta(distribution.Distribution):
     gamma1_sample = random_ops.random_gamma(
         [n,], a, dtype=self.dtype, seed=seed)
     gamma2_sample = random_ops.random_gamma(
-        [n,], b, dtype=self.dtype, seed=seed)
+        [n,], b, dtype=self.dtype,
+        seed=distribution_util.gen_new_seed(seed, "beta"))
     beta_sample = gamma1_sample / (gamma1_sample + gamma2_sample)
     return beta_sample
 
diff --git a/tensorflow/contrib/distributions/python/ops/bijector.py b/tensorflow/contrib/distributions/python/ops/bijector.py
index 9f69d3cb21d..d658e02802c 100644
--- a/tensorflow/contrib/distributions/python/ops/bijector.py
+++ b/tensorflow/contrib/distributions/python/ops/bijector.py
@@ -12,12 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""An API for reversible (bijective) transformations of random variables."""
+r"""Bijector Ops.
+
+An API for reversible (bijective) transformations of random variables.
+
+## Background
+
+Differentiable, bijective transformations of continuous random variables alter
+the calculations made in the cumulative/probability distribution functions and
+sample function.  This module provides a standard interface for making these
+manipulations.
+
+For more details and examples, see the `Bijector` docstring.
+
+To apply a `Bijector`, use `distributions.TransformedDistribution`.
+
+## Bijectors
+
+@@Bijector
+@@Identity
+@@Inline
+@@Exp
+@@ScaleAndShift
+
+"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import abc
 import contextlib
+import six
 
 from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape
 from tensorflow.python.framework import constant_op
@@ -26,40 +51,43 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
 
 
-class _Bijector(object):
-  """An interface for transforming a `Distribution` `Tensor`.
-
-  Recall that a `Distribution` `Tensor` has dimensions which have `sample`,
-  `batch`, and `event` semantics. (See `DistributionShape` for more details.)
+@six.add_metaclass(abc.ABCMeta)
+class Bijector(object):
+  """Interface for transforming a `Distribution` via `TransformedDistribution`.
 
   A `Bijector` implements a bijective, differentiable function by transforming
   an input `Tensor`. The output `Tensor` shape is constrained by the input
   `sample`, `batch`, and `event` shape.  A `Bijector` is characterized by three
   operations:
 
-  (1) Forward Evaluation
-      Useful for turning one random outcome into another random outcome from a
-      different distribution.
+  1. Forward Evaluation
 
-  (2) Inverse Evaluation
-      Useful for "reversing" a transformation to compute one probability in
-      terms of another.
+     Useful for turning one random outcome into another random outcome from a
+     different distribution.
 
-  (3) (log o det o Jacobian o inverse)(x)
-      "The log of the determinant of the matrix of all first-order partial
-      derivatives of the inverse function."
-      Useful for inverting a transformation to compute one probability in terms
-      of another.  Geometrically, the det(Jacobian) is the volume of the
-      transformation and is used to scale the probability.
+  2. Inverse Evaluation
+
+     Useful for "reversing" a transformation to compute one probability in
+     terms of another.
+
+  3. (log o det o Jacobian o inverse)(x)
+
+     "The log of the determinant of the matrix of all first-order partial
+     derivatives of the inverse function."
+     Useful for inverting a transformation to compute one probability in terms
+     of another.  Geometrically, the det(Jacobian) is the volume of the
+     transformation and is used to scale the probability.
 
   By convention, transformations of random variables are named in terms of the
   forward transformation. The forward transformation creates samples, the
   inverse is useful for computing probabilities.
 
   Example Use:
-    Basic properties:
+
+    - Basic properties:
 
     ```python
     x = ... # A tensor.
@@ -69,7 +97,7 @@ class _Bijector(object):
     x != my_bijector.forward(fwd_x)  # Not equal because g(x) != g(g(x)).
     ```
 
-    Computing a log-likelihood:
+    - Computing a log-likelihood:
 
     ```python
     def transformed_log_pdf(bijector, log_pdf, x):
@@ -77,7 +105,7 @@ class _Bijector(object):
               log_pdf(bijector.inverse(x)))
     ```
 
-    Transforming a random outcome:
+    - Transforming a random outcome:
 
     ```python
     def transformed_sample(bijector, x):
@@ -85,7 +113,8 @@ class _Bijector(object):
     ```
 
   Example transformations:
-    "Exponential"
+
+    - "Exponential"
 
       ```
       Y = g(X) = exp(X)
@@ -102,7 +131,7 @@ class _Bijector(object):
                   = (1 / y) Normal(log(y); 0, 1)
       ```
 
-    "ShiftAndScale"
+    - "ScaleAndShift"
 
       ```
       Y = g(X) = sqrtSigma * X + mu
@@ -122,7 +151,8 @@ class _Bijector(object):
 
   Example of why a `Bijector` needs to understand sample, batch, event
   partitioning:
-    Consider the `Exp` `Bijector` applied to a `Tensor` which has sample, batch,
+
+  - Consider the `Exp` `Bijector` applied to a `Tensor` which has sample, batch,
     and event (S, B, E) shape semantics.  Suppose
     the `Tensor`'s partitioned-shape is `(S=[4], B=[2], E=[3, 3])`.
 
@@ -132,24 +162,25 @@ class _Bijector(object):
     over the event dimensions.
 
   Subclass Requirements:
-    Subclasses are expected to implement `_forward` and one or both of:
+
+  - Subclasses are expected to implement `_forward` and one or both of:
       - `_inverse`, `_inverse_log_det_jacobian`,
       - `_inverse_and_inverse_log_det_jacobian`.
 
-    If computation can be shared among `_inverse` and
+  - If computation can be shared among `_inverse` and
     `_inverse_log_det_jacobian` it is preferable to implement
     `_inverse_and_inverse_log_det_jacobian`. This usually reduces
     graph-construction overhead because a `Distribution`'s implementation of
     `log_prob` will need to evaluate both the inverse Jacobian as well as the
     inverse function.
 
-    If an additional use case needs just `inverse` or just
+  - If an additional use case needs just `inverse` or just
     `inverse_log_det_jacobian` then he or she may also wish to implement these
     functions to avoid computing the `inverse_log_det_jacobian` or the
     `inverse`, respectively.
   """
 
-  # TODO(b/30476956): Try to remove constructor dependence on ndims.
+  @abc.abstractmethod
   def __init__(self,
                batch_ndims=None,
                event_ndims=None,
@@ -236,6 +267,9 @@ class _Bijector(object):
     """Returns the string name of this `Bijector`."""
     return self._name
 
+  def _forward(self, x):
+    raise NotImplementedError("forward is not implemented.")
+
   def forward(self, x, name="forward"):
     """Returns the forward `Bijector` evaluation, i.e., X = g(Y).
 
@@ -249,13 +283,16 @@ class _Bijector(object):
     Raises:
       TypeError: if `self.dtype` is specified and `x.dtype` is not
         `self.dtype`.
-      AttributeError: if `_forward` is not implemented.
+      NotImplementedError: if `_forward` is not implemented.
     """
     with self._name_scope(name, [x]):
       x = ops.convert_to_tensor(x, name="x")
       self._maybe_assert_dtype(x)
       return self._forward(x)
 
+  def _inverse(self, x):
+    raise NotImplementedError("inverse is not implemented")
+
   def inverse(self, x, name="inverse"):
     """Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
 
@@ -269,7 +306,7 @@ class _Bijector(object):
     Raises:
       TypeError: if `self.dtype` is specified and `x.dtype` is not
         `self.dtype`.
-      AttributeError: if neither `_inverse` nor
+      NotImplementedError: if neither `_inverse` nor
         `_inverse_and_inverse_log_det_jacobian` are implemented.
     """
     with self._name_scope(name, [x]):
@@ -277,11 +314,14 @@ class _Bijector(object):
       self._maybe_assert_dtype(x)
       try:
         return self._inverse(x)
-      except AttributeError:
+      except NotImplementedError:
         # Since _inverse was not implemented, try to see if it's implemented
         # by the _inverse_and_inverse_log_det_jacobian member.
         return self._inverse_and_inverse_log_det_jacobian(x)[0]
 
+  def _inverse_log_det_jacobian(self, x):
+    raise NotImplementedError("inverse_log_det_jacobian is not implemented")
+
   def inverse_log_det_jacobian(self, x, name="inverse_log_det_jacobian"):
     """Returns the (log o det o Jacobian o inverse)(x).
 
@@ -300,7 +340,7 @@ class _Bijector(object):
     Raises:
       TypeError: if `self.dtype` is specified and `x.dtype` is not
         `self.dtype`.
-      AttributeError: if neither `_inverse_log_det_jacobian` nor
+      NotImplementedError: if neither `_inverse_log_det_jacobian` nor
         `_inverse_and_inverse_log_det_jacobian` are implemented.
     """
     with self._name_scope(name, [x]):
@@ -308,11 +348,15 @@ class _Bijector(object):
       self._maybe_assert_dtype(x)
       try:
         return self._inverse_log_det_jacobian(x)
-      except AttributeError:
+      except NotImplementedError:
         # Since _inverse_log_det_jacobian was not implemented, try to see if
         # it's implemented by the _inverse_and_inverse_log_det_jacobian member.
         return self._inverse_and_inverse_log_det_jacobian(x)[1]
 
+  def _inverse_and_inverse_log_det_jacobian(self, x):
+    raise NotImplementedError(
+        "inverse_and_inverse_log_det_jacobian is not implemented.")
+
   def inverse_and_inverse_log_det_jacobian(
       self, x, name="inverse_and_inverse_log_det_jacobian"):
     """Returns both the inverse evaluation and inverse_log_det_jacobian.
@@ -332,15 +376,15 @@ class _Bijector(object):
     Raises:
       TypeError: if `self.dtype` is specified and `x.dtype` is not
         `self.dtype`.
-      AttributeError: if neither `_inverse_and_inverse_log_det_jacobian` nor
-        {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+      NotImplementedError: if neither `_inverse_and_inverse_log_det_jacobian`
+        nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
     """
     with self._name_scope(name, [x]):
       x = ops.convert_to_tensor(x, name="x")
       self._maybe_assert_dtype(x)
       try:
         return self._inverse_and_inverse_log_det_jacobian(x)
-      except AttributeError:
+      except NotImplementedError:
         # Since _inverse_and_inverse_log_det_jacobian was not implemented, try
         # to see if we can separately use _inverse and
         # _inverse_log_det_jacobian members.
@@ -361,7 +405,7 @@ class _Bijector(object):
                       (self.dtype, x.dtype))
 
 
-class _Identity(_Bijector):
+class Identity(Bijector):
   """Bijector which computes Y = g(X) = X.
 
     Example Use:
@@ -378,7 +422,7 @@ class _Identity(_Bijector):
   """
 
   def __init__(self, validate_args=False, name="Identity"):
-    super(_Identity, self).__init__(
+    super(Identity, self).__init__(
         batch_ndims=0,
         event_ndims=0,
         is_constant_jacobian=True,
@@ -396,7 +440,59 @@ class _Identity(_Bijector):
     return constant_op.constant(0., dtype=x.dtype)
 
 
-class _Exp(_Bijector):
+class Inline(Bijector):
+  # pylint: disable=line-too-long
+  """Bijector constructed from callables implementing forward, inverse, and inverse_log_det_jacobian.
+
+  Example Use:
+
+  ```python
+  exp = Inline(
+    forward_fn=tf.exp,
+    inverse_fn=tf.log,
+    inverse_log_det_jacobian_fn=(
+      lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)),
+    name="Exp")
+  ```
+
+  The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`.
+  """
+  # pylint: enable=line-too-long
+
+  def __init__(self, forward_fn, inverse_fn, inverse_log_det_jacobian_fn,
+               is_constant_jacobian=False, name="Inline"):
+    """Creates a `Bijector` from callables.
+
+    Args:
+      forward_fn: Python callable implementing the forward transformation.
+      inverse_fn: Python callable implementing the inverse transformation.
+      inverse_log_det_jacobian_fn: Python callable implementing the
+        inverse_log_det_jacobian transformation.
+      is_constant_jacobian: `Boolean` indicating that the Jacobian is constant
+        for all input arguments.
+      name: `String`, name given to ops managed by this object.
+    """
+    super(Inline, self).__init__(
+        batch_ndims=0,
+        event_ndims=0,
+        is_constant_jacobian=is_constant_jacobian,
+        validate_args=False,
+        name=name)
+    self._forward_fn = forward_fn
+    self._inverse_fn = inverse_fn
+    self._inverse_log_det_jacobian_fn = inverse_log_det_jacobian_fn
+
+  def _forward(self, x):
+    return self._forward_fn(x)
+
+  def _inverse(self, y):
+    return self._inverse_fn(y)
+
+  def _inverse_log_det_jacobian(self, y):
+    return self._inverse_log_det_jacobian_fn(y)
+
+
+class Exp(Bijector):
   """Bijector which computes Y = g(X) = exp(X).
 
     Example Use:
@@ -417,12 +513,11 @@ class _Exp(_Bijector):
     over the event space.
   """
 
-  # TODO(b/30476956): Try to remove constructor dependence on ndims.
   def __init__(self,
                event_ndims=0,
                validate_args=False,
                name="Exp"):
-    super(_Exp, self).__init__(
+    super(Exp, self).__init__(
         batch_ndims=0,
         event_ndims=event_ndims,
         validate_args=validate_args,
@@ -448,7 +543,7 @@ class _Exp(_Bijector):
     return y, -math_ops.reduce_sum(y, reduction_indices=event_dims)
 
 
-class _ShiftAndScale(_Bijector):
+class ScaleAndShift(Bijector):
   """Bijector which computes Y = g(X; loc, scale) = scale * X + loc.
 
   Example Use:
@@ -457,35 +552,35 @@ class _ShiftAndScale(_Bijector):
   # No batch, scalar.
   mu = 0     # shape=[]
   sigma = 1  # shape=[]
-  b = ShiftAndScale(loc=mu, scale=sigma)
+  b = ScaleAndShift(loc=mu, scale=sigma)
   # b.shaper.batch_ndims == 0
   # b.shaper.event_ndims == 0
 
   # One batch, scalar.
   mu = ...    # shape=[b], b>0
   sigma = ... # shape=[b], b>0
-  b = ShiftAndScale(loc=mu, scale=sigma)
+  b = ScaleAndShift(loc=mu, scale=sigma)
   # b.shaper.batch_ndims == 1
   # b.shaper.event_ndims == 0
 
   # No batch, multivariate.
   mu = ...    # shape=[d],    d>0
   sigma = ... # shape=[d, d], d>0
-  b = ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+  b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
   # b.shaper.batch_ndims == 0
   # b.shaper.event_ndims == 1
 
   # (B1*B2*...*Bb)-batch, multivariate.
   mu = ...    # shape=[B1,...,Bb, d],    b>0, d>0
   sigma = ... # shape=[B1,...,Bb, d, d], b>0, d>0
-  b = ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+  b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
   # b.shaper.batch_ndims == b
   # b.shaper.event_ndims == 1
 
   # Mu is broadcast:
   mu = 1
   sigma = [I, I]  # I is a 3x3 identity matrix.
-  b = ShiftAndScale(loc=mu, scale=sigma, event_ndims=1)
+  b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
   x = numpy.ones(S + sigma.shape)
   b.forward(x) # == x + 1
   ```
@@ -497,7 +592,7 @@ class _ShiftAndScale(_Bijector):
                scale,
                event_ndims=0,
                validate_args=False,
-               name="ShiftAndScale"):
+               name="ScaleAndShift"):
     self._parameters = {}
     self._name = name
     with self._name_scope("init", values=[loc, scale, event_ndims]):
@@ -512,7 +607,7 @@ class _ShiftAndScale(_Bijector):
         raise TypeError("%s.dtype=%s does not match %s" %
                         (event_ndims.name, event_ndims.dtype, dtypes.int32))
       self._scale, batch_ndims = self._process_scale(self.scale, event_ndims)
-      super(_ShiftAndScale, self).__init__(
+      super(ScaleAndShift, self).__init__(
           batch_ndims=batch_ndims,
           event_ndims=event_ndims,
           parameters={"loc": self.loc, "scale": self.scale},
@@ -590,3 +685,77 @@ class _ShiftAndScale(_Bijector):
     return -math_ops.reduce_sum(
         math_ops.log(array_ops.matrix_diag_part(self.scale)),
         reduction_indices=[-1])
+
+
+class Softplus(Bijector):
+  """Bijector which computes `Y = g(X) = Log[1 + exp(X)]`.
+
+  The softplus `Bijector` has the following two useful properties:
+
+  * The domain is the positive real numbers
+  * `softplus(x) approx x`, for large `x`, so it does not overflow as easily as
+    the `Exp` `Bijector`.
+
+    Example Use:
+
+    ```python
+    # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1
+    # batch ndim and 2 event ndims (i.e., vector of matrices).
+    softplus = Softplus(batch_ndims=1, event_ndims=2)
+    x = [[[1., 2],
+           [3, 4]],
+          [[5, 6],
+           [7, 8]]]
+    log(1 + exp(x)) == softplus.forward(x)
+    log(exp(x) - 1) == softplus.inverse(x)
+    ```
+
+    Note: log(.) and exp(.) are applied element-wise but the Jacobian is a
+    reduction over the event space.
+  """
+
+  def __init__(self,
+               event_ndims=0,
+               validate_args=False,
+               name="Softplus"):
+    super(Softplus, self).__init__(
+        batch_ndims=0,
+        event_ndims=event_ndims,
+        validate_args=validate_args,
+        name=name)
+
+  def _forward(self, x):
+    return nn_ops.softplus(x)
+
+  def _inverse(self, x):
+    # The most stable inverse of softplus is not the most direct one.
+    # y = softplus(x) = Log[1 + exp{x}], (which means y > 0).
+    # ==> exp{y} = 1 + exp{x}
+    # ==> x = Log[exp{y} - 1]
+    #       = Log[(exp{y} - 1) / exp{y}] + Log[exp{y}]
+    #       = Log[(1 - exp{-y}) / 1] + Log[exp{y}]
+    #       = Log[1 - exp{-y}] + y
+    # Recalling y > 0, you see that this is more stable than Log[exp{y} - 1].
+    return x + math_ops.log(1. - math_ops.exp(-x))
+
+  def _inverse_log_det_jacobian(self, x):
+    # Stable inverse log det jacobian.
+    # Y = Log[1 + exp{X}] ==> X = Log[exp{Y} - 1]
+    # ==> dX/dY = exp{Y} / (exp{Y} - 1)
+    #           = 1 / (1 - exp{-Y}),
+    # which is the most stable for Y > 0.
+    if self.shaper is None:
+      raise ValueError("Jacobian cannot be computed with unknown event_ndims")
+    _, _, event_dims = self.shaper.get_dims(x)
+    return -math_ops.reduce_sum(
+        math_ops.log(1. - math_ops.exp(-x)), reduction_indices=event_dims)
+
+  def _inverse_and_inverse_log_det_jacobian(self, x):
+    if self.shaper is None:
+      raise ValueError("Jacobian cannot be computed with unknown event_ndims")
+    _, _, event_dims = self.shaper.get_dims(x)
+    log_one_minus_exp_neg = math_ops.log(1. - math_ops.exp(-x))
+    y = x + log_one_minus_exp_neg
+    ildj = -math_ops.reduce_sum(
+        log_one_minus_exp_neg, reduction_indices=event_dims)
+    return y, ildj
diff --git a/tensorflow/contrib/distributions/python/ops/distribution_util.py b/tensorflow/contrib/distributions/python/ops/distribution_util.py
index 1838c3a9ea1..bbb7a84f27a 100644
--- a/tensorflow/contrib/distributions/python/ops/distribution_util.py
+++ b/tensorflow/contrib/distributions/python/ops/distribution_util.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import functools
+import hashlib
 import sys
 import numpy as np
 
@@ -197,8 +198,8 @@ def log_combinations(n, counts, name="log_combinations"):
   # The sum should be along the last dimension of counts.  This is the
   # "distribution" dimension. Here n a priori represents the sum of counts.
   with ops.name_scope(name, values=[n, counts]):
-    n = array_ops.identity(n, name="n")
-    counts = array_ops.identity(counts, name="counts")
+    n = ops.convert_to_tensor(n, name="n")
+    counts = ops.convert_to_tensor(counts, name="counts")
     total_permutations = math_ops.lgamma(n + 1)
     counts_factorial = math_ops.lgamma(counts + 1)
     redundant_permutations = math_ops.reduce_sum(counts_factorial,
@@ -397,6 +398,14 @@ def pick_vector(cond,
                            [math_ops.select(cond, n, -1)])
 
 
+def gen_new_seed(seed, salt):
+  """Generate a new seed, from the given seed and salt."""
+  if seed:
+    string = (str(seed) + salt).encode("utf-8")
+    return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF
+  return None
+
+
 def override_docstring_if_empty(fn, doc_str):
   """Override the `doc_str` argument to `fn.__doc__`.
 
diff --git a/tensorflow/contrib/distributions/python/ops/mixture.py b/tensorflow/contrib/distributions/python/ops/mixture.py
index bd6f920c2cc..9827df6d10b 100644
--- a/tensorflow/contrib/distributions/python/ops/mixture.py
+++ b/tensorflow/contrib/distributions/python/ops/mixture.py
@@ -22,6 +22,7 @@ import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import categorical
 from tensorflow.contrib.distributions.python.ops import distribution
+from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
@@ -295,8 +296,10 @@ class Mixture(distribution.Distribution):
           partitions=cat_samples,
           num_partitions=self.num_components)
       samples_class = [None for _ in range(self.num_components)]
+
       for c in range(self.num_components):
         n_class = array_ops.size(partitioned_samples_indices[c])
+        seed = distribution_util.gen_new_seed(seed, "mixture")
         samples_class_c = self.components[c].sample_n(n_class, seed=seed)
 
         # Pull out the correct batch entries from each index.
diff --git a/tensorflow/contrib/distributions/python/ops/student_t.py b/tensorflow/contrib/distributions/python/ops/student_t.py
index d038100799c..06350482af0 100644
--- a/tensorflow/contrib/distributions/python/ops/student_t.py
+++ b/tensorflow/contrib/distributions/python/ops/student_t.py
@@ -177,22 +177,17 @@ class StudentT(distribution.Distribution):
     return tensor_shape.scalar()
 
   def _sample_n(self, n, seed=None):
-    # We use 2 uniform random floats to generate polar random variates.
-    # http://dl.acm.org/citation.cfm?id=179631
-    # Theorem 2. Let G, H be iid variates, uniformly distributed on [0,1].
-    # Let theta = 2*pi*H, let R = sqrt(df*(G^(-2/df) - 1)) for df > 0.
-    # Let X = R*cos(theta), and let Y = R*sin(theta).
-    # Then X ~ t_df and Y ~ t_df.
-    # The variates X and Y are not independent.
-    shape = array_ops.concat(0, ([2, n], self.batch_shape()))
-    uniform = random_ops.random_uniform(shape=shape,
-                                        dtype=self.dtype,
-                                        seed=seed)
-    samples_g, samples_h = array_ops.unpack(uniform, num=2)
-    theta = (2. * math.pi) * samples_h
-    r = math_ops.sqrt(self.df *
-                      (math_ops.pow(samples_g, -2 / self.df) - 1))
-    samples = r * math_ops.cos(theta)
+    # The sampling method comes from the well known fact that if X ~ Normal(0,
+    # 1), and Z ~ Chi2(df), then X / sqrt(Z / df) ~ StudentT(df).
+    shape = array_ops.concat(0, ([n], self.batch_shape()))
+    normal_sample = random_ops.random_normal(
+        shape, dtype=self.dtype, seed=seed)
+    half = constant_op.constant(0.5, self.dtype)
+    df = self.df * array_ops.ones(self.batch_shape(), dtype=self.dtype)
+    gamma_sample = random_ops.random_gamma(
+        [n,], half * df, beta=half, dtype=self.dtype,
+        seed=distribution_util.gen_new_seed(seed, salt="student_t"))
+    samples = normal_sample / math_ops.sqrt(gamma_sample / df)
     return samples * self.sigma + self.mu
 
   def _log_prob(self, x):
diff --git a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py
index 20a7a2bf048..f43f1c0421e 100644
--- a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py
+++ b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py
@@ -26,107 +26,139 @@ from tensorflow.python.ops import math_ops
 class TransformedDistribution(distribution.Distribution):
   """A Transformed Distribution.
 
-  A Transformed Distribution models `p(y)` given a base distribution `p(x)`,
-  an invertible transform, `y = f(x)`, and the determinant of the Jacobian of
-  `f(x)`.
+  A Transformed Distribution models `p(y)` given a base distribution `p(x)`, and
+  a deterministic, invertible, differentiable transform, `Y = g(X)`. The
+  transform is typically an instance of the `Bijector` class and the base
+  distribution is typically an instance of the `Distribution` class.
 
   Shapes, type, and reparameterization are taken from the base distribution.
 
-  #### Mathematical details
+  Write `P(Y=y)` for cumulative density function of random variable (rv) `Y` and
+  `p` for its derivative wrt to `Y`.  Assume that `Y=g(X)` where `g` is
+  continuous and `X=g^{-1}(Y)`. Write `J` for the Jacobian (of some function).
 
-  * `p(x)` - probability distribution for random variable X
-  * `p(y)` - probability distribution for random variable Y
-  * `f` - transform
-  * `g` - inverse transform, `g(f(x)) = x`
-  * `J(x)` - Jacobian of f(x)
+  A `TransformedDistribution` alters the input/outputs of a `Distribution`
+  associated with rv `X` in the following ways:
 
-  A Transformed Distribution exposes `sample` and `pdf`:
+    * `sample`:
 
-    * `sample`: `y = f(x)`, after drawing a sample of X.
-    * `pdf`: `p(y) = p(x) / det|J(x)| = p(g(y)) / det|J(g(y))|`
+      Mathematically:
+
+      ```
+      Y = g(X)
+      ```
+
+      Programmatically:
+
+      ```python
+      return bijector.forward(distribution.sample(...))
+      ```
+
+    * `log_prob`:
+
+      Mathematically:
+
+      ```
+      (log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)
+      ```
+
+      Programmatically:
+
+      ```python
+      return (bijector.inverse_log_det_jacobian(x) +
+              distribution.log_prob(bijector.inverse(x))
+      ```
+
+    * `log_cdf`:
+
+      Mathematically:
+
+      ```
+      (log o P o g^{-1})(y)
+      ```
+
+      Programmatically:
+
+      ```python
+      return distribution.log_prob(bijector.inverse(x))
+      ```
+
+    * and similarly for: `cdf`, `prob`, `log_survival_function`,
+     `survival_function`.
 
   A simple example constructing a Log-Normal distribution from a Normal
   distribution:
 
   ```python
-  logit_normal = TransformedDistribution(
-    base_dist_cls=tf.contrib.distributions.Normal,
-    mu=mu,
-    sigma=sigma,
-    transform=lambda x: tf.sigmoid(x),
-    inverse=lambda y: tf.log(y) - tf.log(1. - y),
-    log_det_jacobian=(lambda x:
-        tf.reduce_sum(tf.log(tf.sigmoid(x)) + tf.log(1. - tf.sigmoid(x)),
-                      reduction_indices=[-1])))
-    name="LogitNormalTransformedDistribution"
-  )
+  ds = tf.contrib.distributions
+  log_normal = ds.TransformedDistribution(
+    base_distribution=ds.Normal(mu=mu, sigma=sigma),
+    bijector=ds.bijector.Exp(),
+    name="LogNormalTransformedDistribution")
+  ```
+
+  A `LogNormal` made from callables:
+
+  ```python
+  ds = tf.contrib.distributions
+  log_normal = ds.TransformedDistribution(
+    base_distribution=ds.Normal(mu=mu, sigma=sigma),
+    bijector=ds.bijector.Inline(
+      forward_fn=tf.exp,
+      inverse_fn=tf.log,
+      inverse_log_det_jacobian_fn=(
+        lambda y: -tf.reduce_sum(tf.log(x), reduction_indices=-1)),
+    name="LogNormalTransformedDistribution")
+  ```
+
+  Another example constructing a Normal from a StandardNormal:
+
+  ```python
+  ds = tf.contrib.distributions
+  normal = ds.TransformedDistribution(
+    base_distribution=ds.Normal(mu=0, sigma=1),
+    bijector=ds.bijector.ScaleAndShift(loc=mu, scale=sigma, event_ndims=0),
+    name="NormalTransformedDistribution")
   ```
 
   """
 
   def __init__(self,
-               base_dist_cls,
-               transform,
-               inverse,
-               log_det_jacobian,
-               name="TransformedDistribution",
-               **base_dist_args):
+               base_distribution,
+               bijector,
+               name="TransformedDistribution"):
     """Construct a Transformed Distribution.
 
     Args:
-      base_dist_cls: the base distribution class to transform. Must be a
-          subclass of `Distribution`.
-      transform: a callable that takes a `Tensor` sample from `base_dist` and
-          returns a `Tensor` of the same shape and type. `x => y`.
-      inverse: a callable that computes the inverse of transform. `y => x`. If
-          None, users can only call `log_pdf` on values returned by `sample`.
-      log_det_jacobian: a callable that takes a `Tensor` sample from `base_dist`
-          and returns the log of the determinant of the Jacobian of `transform`.
+      base_distribution: The base distribution class to transform. Typically an
+        instance of `Distribution`.
+      bijector: The object responsible for calculating the transformation.
+        Typically an instance of `Bijector`.
       name: The name for the distribution.
-      **base_dist_args: kwargs to pass on to dist_cls on construction.
-
-    Raises:
-      TypeError: if `base_dist_cls` is not a subclass of
-          `Distribution`.
     """
-    with ops.name_scope(name, values=base_dist_args.values()) as ns:
-      self._base_dist = base_dist_cls(**base_dist_args)
-      self._transform = transform
-      self._inverse = inverse
-      self._log_det_jacobian = log_det_jacobian
+    with ops.name_scope(name) as ns:
+      self._base_distribution = base_distribution
+      self._bijector = bijector
       self._inverse_cache = {}
       super(TransformedDistribution, self).__init__(
-          dtype=self._base_dist.dtype,
-          parameters={"base_dist_cls": base_dist_cls,
-                      "transform": transform,
-                      "inverse": inverse,
-                      "log_det_jacobian": log_det_jacobian,
-                      "base_dist_args": base_dist_args},
-          is_continuous=self._base_dist.is_continuous,
-          is_reparameterized=self._base_dist.is_reparameterized,
-          validate_args=self._base_dist.validate_args,
-          allow_nan_stats=self._base_dist.allow_nan_stats,
+          dtype=self._base_distribution.dtype,
+          parameters={"base_distribution": base_distribution,
+                      "bijector": bijector},
+          is_continuous=self._base_distribution.is_continuous,
+          is_reparameterized=self._base_distribution.is_reparameterized,
+          validate_args=self._base_distribution.validate_args,
+          allow_nan_stats=self._base_distribution.allow_nan_stats,
           name=ns)
 
   @property
   def base_distribution(self):
     """Base distribution, p(x)."""
-    return self._base_dist
+    return self._base_distribution
 
   @property
-  def transform(self):
+  def bijector(self):
     """Function transforming x => y."""
-    return self._transform
-
-  @property
-  def inverse(self):
-    """Inverse function of transform, y => x."""
-    return self._inverse
-
-  @property
-  def log_det_jacobian(self):
-    """Function computing the log determinant of the Jacobian of transform."""
-    return self._log_det_jacobian
+    return self._bijector
 
   def _batch_shape(self):
     return self.base_distribution.batch_shape()
@@ -142,29 +174,27 @@ class TransformedDistribution(distribution.Distribution):
 
   @distribution_util.AppendDocstring(
       """Samples from the base distribution and then passes through
-      the transform.""")
+      the bijector's forward transform.""")
   def _sample_n(self, n, seed=None):
-    samples = self.base_distribution.sample_n(n=n, seed=seed)
-    with ops.name_scope("transform"):
-      transformed = self.transform(samples)
-      self._inverse_cache[transformed] = samples
-      return transformed
+    raw_samples = self.base_distribution.sample_n(n=n, seed=seed)
+    samples = self.bijector.forward(raw_samples)
+    self._inverse_cache[samples] = raw_samples
+    return samples
 
   @distribution_util.AppendDocstring(
-      """Implements `(log o p o g)(y) - (log o det o J o g)(y)`,
-      where `g` is the inverse of `transform`.
+      """Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
+      where `g^{-1}` is the inverse of `transform`.
 
       Also raises a `ValueError` if `inverse` was not provided to the
       distribution and `y` was not returned from `sample`.""")
   def _log_prob(self, y):
     x = self._inverse_possibly_from_cache(y)
-    with ops.name_scope("log_det_jacobian"):
-      log_det_jacobian = self.log_det_jacobian(x)
-    return self.base_distribution.log_prob(x) - log_det_jacobian
+    inverse_log_det_jacobian = self.bijector.inverse_log_det_jacobian(y)
+    return self.base_distribution.log_prob(x) + inverse_log_det_jacobian
 
   @distribution_util.AppendDocstring(
-      """Implements `p(g(y)) / det|J(g(y))|`, where `g` is the inverse of
-      `transform`.
+      """Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
+      inverse of `transform`.
 
       Also raises a `ValueError` if `inverse` was not provided to the
       distribution and `y` was not returned from `sample`.""")
@@ -172,8 +202,6 @@ class TransformedDistribution(distribution.Distribution):
     return math_ops.exp(self._log_prob(y))
 
   def _log_cdf(self, y):
-    # If Y = f(X),
-    # P[Y <= y] = P[f(X) <= y] = P[X <= f^{-1}(y)]
     x = self._inverse_possibly_from_cache(y)
     return self.base_distribution.log_cdf(x)
 
@@ -192,12 +220,7 @@ class TransformedDistribution(distribution.Distribution):
   def _inverse_possibly_from_cache(self, y):
     """Return `self._inverse(y)`, possibly using cached value."""
     y = ops.convert_to_tensor(y, name="y")
-    with ops.name_scope("inverse"):
-      if y in self._inverse_cache:
-        x = self._inverse_cache[y]
-      elif self.inverse:
-        x = self.inverse(y)
-      else:
-        raise ValueError("No inverse function exists and input `y` was not "
-                         "returned from `sample`.")
-    return x
+    if y in self._inverse_cache:
+      return self._inverse_cache[y]
+    else:
+      return self.bijector.inverse(y)
diff --git a/tensorflow/contrib/distributions/python/ops/wishart.py b/tensorflow/contrib/distributions/python/ops/wishart.py
index 92d952a1d24..29fa19d6fd1 100644
--- a/tensorflow/contrib/distributions/python/ops/wishart.py
+++ b/tensorflow/contrib/distributions/python/ops/wishart.py
@@ -22,6 +22,7 @@ import math
 import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import distribution
+from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.contrib.distributions.python.ops import operator_pd_cholesky
 from tensorflow.contrib.distributions.python.ops import operator_pd_full
 from tensorflow.contrib.framework.python.framework import tensor_util as contrib_tensor_util
@@ -211,7 +212,8 @@ class _WishartOperatorPD(distribution.Distribution):
                                     0.5 * self.df, self.dimension),
                                 beta=0.5,
                                 dtype=self.dtype,
-                                seed=seed)
+                                seed=distribution_util.gen_new_seed(
+                                    seed, "wishart"))
 
     # Complexity: O(nbk^2)
     x = array_ops.matrix_band_part(x, -1, 0)  # Tri-lower.
diff --git a/tensorflow/contrib/framework/python/framework/decorator_utils.py b/tensorflow/contrib/framework/python/framework/decorator_utils.py
index e8d6dbe249e..155003498ce 100644
--- a/tensorflow/contrib/framework/python/framework/decorator_utils.py
+++ b/tensorflow/contrib/framework/python/framework/decorator_utils.py
@@ -56,6 +56,7 @@ def add_notice_to_docstring(
 def validate_callable(func, decorator_name):
   if not hasattr(func, '__call__'):
     raise ValueError(
-        '%s is not a function. If this is a property, '
-        'apply @%s before @property:\n\n@property\n@%s\ndef method(...)' % (
+        '%s is not a function. If this is a property, make sure'
+        ' @property appears before @%s in your source code:'
+        '\n\n@property\n@%s\ndef method(...)' % (
             func, decorator_name, decorator_name))
diff --git a/tensorflow/contrib/framework/python/framework/deprecation_test.py b/tensorflow/contrib/framework/python/framework/deprecation_test.py
index 409758b8531..c5422f47316 100644
--- a/tensorflow/contrib/framework/python/framework/deprecation_test.py
+++ b/tensorflow/contrib/framework/python/framework/deprecation_test.py
@@ -245,11 +245,10 @@ class DeprecationTest(tf.test.TestCase):
     self.assertRegexpMatches(args[0], r"deprecated and will be removed after")
     self._assert_subset(set([date, instructions]), set(args[1:]))
 
-  @tf.test.mock.patch.object(logging, "warning", autospec=True)
-  def test_prop_wrong_order(self, mock_warning):
-
+  def test_prop_wrong_order(self):
     with self.assertRaisesRegexp(
-        ValueError, "apply @deprecated before @property"):
+        ValueError,
+        "make sure @property appears before @deprecated in your source code"):
       # pylint: disable=unused-variable
 
       class _Object(object):
@@ -357,8 +356,7 @@ class DeprecatedArgsTest(tf.test.TestCase):
     with self.assertRaisesRegexp(ValueError, "argument"):
       deprecation.deprecated_args(date, instructions)
 
-  @tf.test.mock.patch.object(logging, "warning", autospec=True)
-  def test_deprecated_missing_args(self, mock_warning):
+  def test_deprecated_missing_args(self):
     date = "2016-07-04"
     instructions = "This is how you update..."
 
diff --git a/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc b/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc
index a81ccff4310..1f1f1a8ca25 100644
--- a/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc
+++ b/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc
@@ -68,6 +68,7 @@ class SparseTensorColumn : public ColumnInterface<InternalType> {
     return feature_counts_[batch];
   }
 
+  // InternalType is int64 only when using HashCrosser.
   int64 DoFeature(int64 batch, int64 n, int64 not_used) const {
     const int64 start = feature_start_indices_[batch];
     if (DT_STRING == values_.dtype())
@@ -75,6 +76,7 @@ class SparseTensorColumn : public ColumnInterface<InternalType> {
     return values_.vec<int64>().data()[start + n];
   }
 
+  // InternalType is string or StringPiece when using StringCrosser.
   string DoFeature(int64 batch, int64 n, string not_used) const {
     const int64 start = feature_start_indices_[batch];
     if (DT_STRING == values_.dtype())
@@ -103,12 +105,14 @@ class DenseTensorColumn : public ColumnInterface<InternalType> {
 
   int64 FeatureCount(int64 batch) const override { return tensor_.dim_size(1); }
 
+  // InternalType is int64 only when using HashCrosser.
   int64 DoFeature(int64 batch, int64 n, int64 not_used) const {
     if (DT_STRING == tensor_.dtype())
       return Fingerprint64(tensor_.matrix<string>()(batch, n));
     return tensor_.matrix<int64>()(batch, n);
   }
 
+  // Internal type is string or StringPiece when using StringCrosser.
   string DoFeature(int64 batch, int64 n, string not_used) const {
     if (DT_STRING == tensor_.dtype()) return tensor_.matrix<string>()(batch, n);
     return std::to_string(tensor_.matrix<int64>()(batch, n));
@@ -158,7 +162,7 @@ class StringCrosser {
  public:
   StringCrosser(const std::vector<
                     std::unique_ptr<ColumnInterface<InternalType>>>& columns,
-                const int64 not_used)
+                const int64 num_buckets_unused, const uint64 hash_key_unused)
       : columns_(columns) {}
 
   string Generate(const int64 batch_index,
@@ -178,32 +182,62 @@ class StringCrosser {
   const std::vector<std::unique_ptr<ColumnInterface<InternalType>>>& columns_;
 };
 
-// Seed is chosen based on third_party/tensorflow/core/lib/hash/hash.h
-const int64 kInitialHashSeed = 0xDECAFCAFFE;
-
-int64 HashCombine(int64 a, int64 b) {
-  return a ^ (b + 0x9e3779b97f4a7800 + (a << 10) + (a >> 4));
-}
-
 // Generates the sparse crosses as nested hash to avoid string manipulations.
 class HashCrosser {
  public:
   HashCrosser(
       const std::vector<std::unique_ptr<ColumnInterface<int64>>>& columns,
-      const int64 num_buckets)
+      const int64 num_buckets, const uint64 hash_key_unused)
       : columns_(columns), num_buckets_(num_buckets) {}
 
   int64 Generate(const int64 batch_index,
                  const std::vector<int>& permutation) const {
+    // Seed is chosen based on third_party/tensorflow/core/lib/hash/hash.h
+    static const int64 kInitialHashSeed = 0xDECAFCAFFE;
+
     uint64 hashed_output = kInitialHashSeed;
-    for (int i = 0; i < permutation.size(); i++) {
+    for (size_t i = 0; i < permutation.size(); ++i) {
       int64 hash_i = columns_[i]->Feature(batch_index, permutation[i]);
       hashed_output = HashCombine(hashed_output, hash_i);
     }
     if (num_buckets_ > 0) {
       return hashed_output % num_buckets_;
     } else {
-      // To perevent negative output we take module to max int64.
+      // To prevent negative output we take modulo to max int64.
+      return hashed_output % std::numeric_limits<int64>::max();
+    }
+  }
+
+ private:
+  static int64 HashCombine(int64 a, int64 b) {
+    return a ^ (b + 0x9e3779b97f4a7800 + (a << 10) + (a >> 4));
+  }
+
+  const std::vector<std::unique_ptr<ColumnInterface<int64>>>& columns_;
+  const int64 num_buckets_;
+};
+
+// Generates the sparse crosses as nested hash to avoid string manipulations.
+class HashCrosserV2 {
+ public:
+  HashCrosserV2(
+      const std::vector<std::unique_ptr<ColumnInterface<int64>>>& columns,
+      const int64 num_buckets, const uint64 hash_key)
+      : columns_(columns), num_buckets_(num_buckets), hash_key_(hash_key) {}
+
+  int64 Generate(const int64 batch_index,
+                 const std::vector<int>& permutation) const {
+    // Do the fingerprint concatenation on uint64.
+    uint64 hashed_output = hash_key_;
+    for (size_t i = 0; i < permutation.size(); ++i) {
+      uint64 hash_i = columns_[i]->Feature(batch_index, permutation[i]);
+      hashed_output = FingerprintCat64(hashed_output, hash_i);
+    }
+    // The return value is int64 based on the number of buckets.
+    if (num_buckets_ > 0) {
+      return hashed_output % num_buckets_;
+    } else {
+      // To prevent negative output we take modulo to max int64.
       return hashed_output % std::numeric_limits<int64>::max();
     }
   }
@@ -211,6 +245,7 @@ class HashCrosser {
  private:
   const std::vector<std::unique_ptr<ColumnInterface<int64>>>& columns_;
   const int64 num_buckets_;
+  const uint64 hash_key_;
 };
 
 // ProductIterator generates cartesian products based on indices.
@@ -262,28 +297,41 @@ class ProductIterator {
   std::vector<int> next_permutation_;
 };
 
-template <bool HASHED_OUTPUT, typename InternalType>
+template <bool HASHED_OUTPUT, typename InternalType, bool VERSION_2>
 struct CrossTraits;
 
-template <typename InternalType>
-struct CrossTraits<false, InternalType> {
+template <typename InternalType, bool VERSION_2>
+struct CrossTraits<false, InternalType, VERSION_2> {
   typedef StringCrosser<InternalType> Crosser;
   typedef OutputUpdater<string> Updater;
 };
 
 template <>
-struct CrossTraits<true, int64> {
+struct CrossTraits<true, int64, false> {
   typedef HashCrosser Crosser;
   typedef OutputUpdater<int64> Updater;
 };
+
+template <>
+struct CrossTraits<true, int64, true> {
+  typedef HashCrosserV2 Crosser;
+  typedef OutputUpdater<int64> Updater;
+};
 }  // namespace
 
-template <bool HASHED_OUTPUT, typename InternalType>
+template <bool HASHED_OUTPUT, typename InternalType, bool VERSION_2>
 class SparseFeatureCrossOp : public OpKernel {
  public:
   explicit SparseFeatureCrossOp(OpKernelConstruction* context)
       : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("num_buckets", &num_buckets_));
+    if (VERSION_2) {
+      // Read signed_hash_key_ as int64 since uint64 attributes are not
+      // supported by REGISTER_OP.
+      int64 signed_hash_key_;
+      OP_REQUIRES_OK(context, context->GetAttr("hash_key", &signed_hash_key_));
+      hash_key_ = static_cast<uint64>(signed_hash_key_);
+    }
   }
 
   void Compute(OpKernelContext* context) override {
@@ -303,8 +351,8 @@ class SparseFeatureCrossOp : public OpKernel {
         GenerateColumnsFromInput(indices_list_in, values_list_in,
                                  shapes_list_in, dense_list_in);
 
-    typename CrossTraits<HASHED_OUTPUT, InternalType>::Crosser crosser(
-        columns, num_buckets_);
+    typename CrossTraits<HASHED_OUTPUT, InternalType, VERSION_2>::Crosser
+        crosser(columns, num_buckets_, hash_key_);
     Tensor* indices_out;
     Tensor* values_out;
     Tensor* shape_out;
@@ -313,8 +361,8 @@ class SparseFeatureCrossOp : public OpKernel {
     CreateOutputTensors(columns, batch_size, context, &indices_out, &values_out,
                         &shape_out, &output_start_indices);
 
-    typename CrossTraits<HASHED_OUTPUT, InternalType>::Updater updater(
-        output_start_indices, indices_out, values_out);
+    typename CrossTraits<HASHED_OUTPUT, InternalType, VERSION_2>::Updater
+        updater(output_start_indices, indices_out, values_out);
     auto do_work = [this, &columns, crosser, updater](int64 begin, int64 end) {
       for (int b = begin; b < end; b++) {
         ProductIterator<InternalType> product_iterator(columns, b);
@@ -459,7 +507,7 @@ class SparseFeatureCrossOp : public OpKernel {
     return columns;
   }
 
-  // Extrats data about the features and populates feature data.
+  // Extracts data about the features and populates feature data.
   void ExtractFeatureData(
       const OpInputList& indices_list_in, int64 batch_size,
       std::vector<std::vector<int64>>* feature_counts,
@@ -536,30 +584,57 @@ class SparseFeatureCrossOp : public OpKernel {
     return cross_count;
   }
   int64 num_buckets_;
+  uint64 hash_key_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<string>("out_type")
                             .TypeConstraint<string>("internal_type"),
-                        SparseFeatureCrossOp<false, StringPiece>);
+                        SparseFeatureCrossOp<false, StringPiece, false>);
 
 REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<string>("out_type")
                             .TypeConstraint<int64>("internal_type"),
-                        SparseFeatureCrossOp<false, string>);
+                        SparseFeatureCrossOp<false, string, false>);
 
 REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<int64>("out_type")
                             .TypeConstraint<string>("internal_type"),
-                        SparseFeatureCrossOp<true, int64>);
+                        SparseFeatureCrossOp<true, int64, false>);
 
 REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<int64>("out_type")
                             .TypeConstraint<int64>("internal_type"),
-                        SparseFeatureCrossOp<true, int64>);
+                        SparseFeatureCrossOp<true, int64, false>);
+
+// The following builders enable FingerprintCat64 concatenation for the
+// crosses features.
+REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<string>("out_type")
+                            .TypeConstraint<string>("internal_type"),
+                        SparseFeatureCrossOp<false, StringPiece, true>);
+
+REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<string>("out_type")
+                            .TypeConstraint<int64>("internal_type"),
+                        SparseFeatureCrossOp<false, string, true>);
+
+REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<int64>("out_type")
+                            .TypeConstraint<string>("internal_type"),
+                        SparseFeatureCrossOp<true, int64, true>);
+
+REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<int64>("out_type")
+                            .TypeConstraint<int64>("internal_type"),
+                        SparseFeatureCrossOp<true, int64, true>);
 
 }  // namespace tensorflow
diff --git a/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc b/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc
index e854292f9da..f73ea5e2c9e 100644
--- a/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc
+++ b/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc
@@ -68,9 +68,87 @@ then the output will be
 if hashed_output=true then the output will be
 
     shape = [2, 2]
-    [0, 0]: Hash64("f", Hash64("d", Hash64("a")))
-    [1, 0]: Hash64("g", Hash64("e", Hash64("b")))
-    [1, 1]: Hash64("g", Hash64("e", Hash64("c")))
+    [0, 0]: HashCombine(
+                Fingerprint64("f"), HashCombine(
+                    Fingerprint64("d"), Fingerprint64("a")))
+    [1, 0]: HashCombine(
+                Fingerprint64("g"), HashCombine(
+                    Fingerprint64("e"), Fingerprint64("b")))
+    [1, 1]: HashCombine(
+                Fingerprint64("g"), HashCombine(
+                    Fingerprint64("e"), Fingerprint64("c")))
+
+indices: 2-D.  Indices of each input `SparseTensor`.
+values: 1-D.   values of each `SparseTensor`.
+shapes: 1-D.   Shapes of each `SparseTensor`.
+dense: 2-D.    Columns represented by dense `Tensor`.
+output_indices: 2-D.  Indices of the concatenated `SparseTensor`.
+output_values: 1-D.  Non-empty values of the concatenated or hashed
+  `SparseTensor`.
+output_shape: 1-D.  Shape of the concatenated `SparseTensor`.
+)doc");
+
+REGISTER_OP("SparseFeatureCrossV2")
+    .Input("indices: N * int64")
+    .Input("values: sparse_types")
+    .Input("shapes: N * int64")
+    .Input("dense: dense_types")
+    .Output("output_indices: int64")
+    .Output("output_values: out_type")
+    .Output("output_shape: int64")
+    .Attr("N: int >= 0")
+    .Attr("hashed_output: bool")
+    .Attr("num_buckets: int >= 0")
+    .Attr("hash_key: int")
+    .Attr("sparse_types: list({int64, string}) >= 0")
+    .Attr("dense_types: list({int64, string}) >= 0")
+    .Attr("out_type: {int64, string}")
+    .Attr("internal_type: {int64, string}")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->Matrix(c->UnknownDim(), 2));
+      c->set_output(1, c->Vector(c->UnknownDim()));
+      c->set_output(2, c->Vector(2));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Generates sparse cross form a list of sparse tensors.
+
+The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each
+representing features of one feature column. It outputs a 2D `SparseTensor` with
+the batchwise crosses of these features.
+
+For example, if the inputs are
+
+    inputs[0]: SparseTensor with shape = [2, 2]
+    [0, 0]: "a"
+    [1, 0]: "b"
+    [1, 1]: "c"
+
+    inputs[1]: SparseTensor with shape = [2, 1]
+    [0, 0]: "d"
+    [1, 0]: "e"
+
+    inputs[2]: Tensor [["f"], ["g"]]
+
+then the output will be
+
+    shape = [2, 2]
+    [0, 0]: "a_X_d_X_f"
+    [1, 0]: "b_X_e_X_g"
+    [1, 1]: "c_X_e_X_g"
+
+if hashed_output=true then the output will be
+
+    shape = [2, 2]
+    [0, 0]: FingerprintCat64(
+                Fingerprint64("f"), FingerprintCat64(
+                    Fingerprint64("d"), Fingerprint64("a")))
+    [1, 0]: FingerprintCat64(
+                Fingerprint64("g"), FingerprintCat64(
+                    Fingerprint64("e"), Fingerprint64("b")))
+    [1, 1]: FingerprintCat64(
+                Fingerprint64("g"), FingerprintCat64(
+                    Fingerprint64("e"), Fingerprint64("c")))
 
 indices: 2-D.  Indices of each input `SparseTensor`.
 values: 1-D.   values of each `SparseTensor`.
diff --git a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
index 1d39435ded9..3bdfc3e81bd 100644
--- a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
+++ b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy
 import tensorflow as tf
 
 
@@ -253,10 +254,13 @@ class SparseCrossOpTest(tf.test.TestCase):
     Cross for the corresponding batch should be empty.
     """
     op = tf.contrib.layers.sparse_feature_cross([
-        self._sparse_tensor(
-            [['batch1-FC1-F1', 'batch1-FC1-F2']], 2), self._sparse_tensor(
-                [['batch1-FC2-F1'], ['batch2-FC2-F1']], 2), self._sparse_tensor(
-                    [['batch1-FC3-F1', 'batch1-FC3-F2']], 2)
+        self._sparse_tensor([
+            ['batch1-FC1-F1', 'batch1-FC1-F2']
+        ], 2), self._sparse_tensor([
+            ['batch1-FC2-F1'], ['batch2-FC2-F1']
+        ], 2), self._sparse_tensor([
+            ['batch1-FC3-F1', 'batch1-FC3-F2']
+        ], 2)
     ])
     expected_out = self._sparse_tensor([[
         'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1',
@@ -298,6 +302,26 @@ class SparseCrossOpTest(tf.test.TestCase):
     with self.test_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
+  def test_hashed_output_zero_bucket_v2(self):
+    """Tests a simple scenario.
+    """
+    op = tf.contrib.layers.sparse_feature_cross(
+        [
+            self._sparse_tensor([
+                ['batch1-FC1-F1']
+            ]), self._sparse_tensor([
+                ['batch1-FC2-F1']
+            ]), self._sparse_tensor([
+                ['batch1-FC3-F1']
+            ])
+        ],
+        hashed_output=True,
+        hash_key=tf.contrib.layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
+    # Check actual hashed output to prevent unintentional hashing changes.
+    expected_out = self._sparse_tensor([[1971693436396284976]])
+    with self.test_session() as sess:
+      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+
   # TODO(sibyl-Aix6ihai): Add benchmark to compare Hashed vs Non-hashed.
   def test_hashed_output(self):
     """Tests a simple scenario.
@@ -319,6 +343,56 @@ class SparseCrossOpTest(tf.test.TestCase):
     with self.test_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
+  def test_hashed_output_v2(self):
+    """Tests a simple scenario.
+    """
+    op = tf.contrib.layers.sparse_feature_cross(
+        [
+            self._sparse_tensor([
+                ['batch1-FC1-F1']
+            ]), self._sparse_tensor([
+                ['batch1-FC2-F1']
+            ]), self._sparse_tensor([
+                ['batch1-FC3-F1']
+            ])
+        ],
+        hashed_output=True,
+        num_buckets=100,
+        hash_key=tf.contrib.layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
+    # Check actual hashed output to prevent unintentional hashing changes.
+    expected_out = self._sparse_tensor([[83]])
+    with self.test_session() as sess:
+      self._assert_sparse_tensor_equals(expected_out, sess.run(op))
+
+  def test_hashed_output_v1_has_collision(self):
+    """Tests the old version of the fingerprint concatenation has collisions.
+    """
+    # The last 10 bits of 359 and 1024+359 are identical.
+    # As a result, all the crosses collide.
+    t1 = tf.constant([[359], [359 + 1024]])
+    t2 = tf.constant([list(range(10)), list(range(10))])
+    cross = tf.contrib.layers.sparse_feature_cross(
+        [t2, t1], hashed_output=True, num_buckets=1024)
+    cross_dense = tf.sparse_tensor_to_dense(cross)
+    with tf.Session():
+      values = cross_dense.eval()
+      self.assertTrue(numpy.equal(values[0], values[1]).all())
+
+  def test_hashed_output_v2_has_no_collision(self):
+    """Tests the new version of the fingerprint concatenation has no collisions.
+    """
+    # Although the last 10 bits of 359 and 1024+359 are identical.
+    # As a result, all the crosses shouldn't collide.
+    t1 = tf.constant([[359], [359 + 1024]])
+    t2 = tf.constant([list(range(10)), list(range(10))])
+    cross = tf.contrib.layers.sparse_feature_cross(
+        [t2, t1], hashed_output=True, num_buckets=1024,
+        hash_key=tf.contrib.layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
+    cross_dense = tf.sparse_tensor_to_dense(cross)
+    with tf.Session():
+      values = cross_dense.eval()
+      self.assertTrue(numpy.not_equal(values[0], values[1]).all())
+
   def test_hashed_3x1x2(self):
     """Tests 3x1x2 permutation with hashed output.
     """
diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py
index f81c20bdc76..6515f52ac33 100644
--- a/tensorflow/contrib/layers/python/layers/embedding_ops.py
+++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py
@@ -170,7 +170,8 @@ def _prune_invalid_ids(sparse_ids, sparse_weights):
   return sparse_ids, sparse_weights
 
 
-def hashed_embedding_lookup(params, values, dimension, name=None):
+def hashed_embedding_lookup(params, values, dimension, name=None,
+                            hash_key=None):
   """Looks up embeddings using parameter hashing for each value in `values`.
 
   The i-th embedding component of a value v in `values` is found by retrieving
@@ -200,6 +201,9 @@ def hashed_embedding_lookup(params, values, dimension, name=None):
     values: `Tensor` of values to be embedded.
     dimension: Embedding dimension
     name: An optional name for this op.
+    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
+      function to combine the crosses fingerprints on SparseFeatureCrossOp
+      (optional).
 
   Returns:
     A tensor with shape [d0, ..., dn, dimension]
@@ -243,7 +247,8 @@ def hashed_embedding_lookup(params, values, dimension, name=None):
     tensors_to_cross = [array_ops.tile(array_ops.expand_dims(
         math_ops.range(0, dimension), 0), array_ops.shape(values)), values]
     ids = sparse_feature_cross_op.sparse_feature_cross(
-        tensors_to_cross, hashed_output=True, num_buckets=num_params)
+        tensors_to_cross, hashed_output=True, num_buckets=num_params,
+        hash_key=hash_key)
     ids = sparse_ops.sparse_tensor_to_dense(ids)
 
     # No need to validate the indices since we have checked the params
@@ -260,7 +265,8 @@ def hashed_embedding_lookup_sparse(params,
                                    dimension,
                                    combiner=None,
                                    default_value=None,
-                                   name=None):
+                                   name=None,
+                                   hash_key=None):
   """Looks up embeddings of a sparse feature using parameter hashing.
 
   See `tf.contrib.layers.hashed_embedding_lookup` for embedding with hashing.
@@ -276,6 +282,9 @@ def hashed_embedding_lookup_sparse(params,
         the default.
     default_value: The value to use for an entry with no features.
     name: An optional name for this op.
+    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
+      function to combine the crosses fingerprints on SparseFeatureCrossOp
+      (optional).
 
   Returns:
      Dense tensor with shape [N, dimension] with N the number of rows in
@@ -315,7 +324,8 @@ def hashed_embedding_lookup_sparse(params,
     values = sparse_values.values
     values, idx = array_ops.unique(values)
 
-    embeddings = hashed_embedding_lookup(params, values, dimension)
+    embeddings = hashed_embedding_lookup(params, values, dimension,
+                                         hash_key=hash_key)
 
     if combiner == "sum":
       embeddings = math_ops.sparse_segment_sum(embeddings, idx, segment_ids,
diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py
index 23b54870a2d..d75eba31db9 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column.py
@@ -1476,6 +1476,7 @@ def bucketized_column(source_column, boundaries):
 class _CrossedColumn(_FeatureColumn,
                      collections.namedtuple("_CrossedColumn",
                                             ["columns", "hash_bucket_size",
+                                             "hash_key",
                                              "combiner", "ckpt_to_load_from",
                                              "tensor_name_in_ckpt"])):
   """Represents a cross transformation also known as conjuction or combination.
@@ -1536,6 +1537,7 @@ class _CrossedColumn(_FeatureColumn,
   def __new__(cls,
               columns,
               hash_bucket_size,
+              hash_key,
               combiner="sqrtn",
               ckpt_to_load_from=None,
               tensor_name_in_ckpt=None):
@@ -1560,7 +1562,8 @@ class _CrossedColumn(_FeatureColumn,
     sorted_columns = sorted(
         [column for column in columns], key=lambda column: column.name)
     return super(_CrossedColumn, cls).__new__(cls, tuple(sorted_columns),
-                                              hash_bucket_size, combiner,
+                                              hash_bucket_size, hash_key,
+                                              combiner,
                                               ckpt_to_load_from,
                                               tensor_name_in_ckpt)
 
@@ -1623,6 +1626,7 @@ class _CrossedColumn(_FeatureColumn,
         feature_tensors,
         hashed_output=True,
         num_buckets=self.hash_bucket_size,
+        hash_key=self.hash_key,
         name="cross")
 
   # pylint: disable=unused-argument
@@ -1650,7 +1654,8 @@ class _CrossedColumn(_FeatureColumn,
 
 def crossed_column(columns, hash_bucket_size, combiner=None,
                    ckpt_to_load_from=None,
-                   tensor_name_in_ckpt=None):
+                   tensor_name_in_ckpt=None,
+                   hash_key=None):
   """Creates a _CrossedColumn.
 
   Args:
@@ -1664,6 +1669,9 @@ def crossed_column(columns, hash_bucket_size, combiner=None,
     tensor_name_in_ckpt: (Optional). Name of the `Tensor` in the provided
       checkpoint from which to restore the column weights. Required if
       `ckpt_to_load_from` is not None.
+    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
+      function to combine the crosses fingerprints on SparseFeatureCrossOp
+      (optional).
 
   Returns:
     A _CrossedColumn.
@@ -1682,6 +1690,7 @@ def crossed_column(columns, hash_bucket_size, combiner=None,
   return _CrossedColumn(
       columns,
       hash_bucket_size,
+      hash_key,
       combiner=combiner,
       ckpt_to_load_from=ckpt_to_load_from,
       tensor_name_in_ckpt=tensor_name_in_ckpt)
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops.py b/tensorflow/contrib/layers/python/layers/feature_column_ops.py
index 623c6093bc3..4000ce88850 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops.py
@@ -128,7 +128,6 @@ def _embeddings_from_arguments(column,
       embeddings,
       input_tensor,
       sparse_weights=weight_tensor,
-      default_id=0,
       combiner=args.combiner,
       name=column.name + 'weights')
 
@@ -214,10 +213,8 @@ def input_from_feature_columns(columns_to_tensors,
     age_buckets = bucketized_column(
         source_column=age,
         boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
-    occupation_x_age = crossed_column(columns=[occupation, age_buckets],
-                                      hash_bucket_size=10000)
 
-    feature_columns=[occupation_emb, occupation_x_age]
+    feature_columns=[occupation_emb, age_buckets]
 
   Args:
     columns_to_tensors: A mapping from feature column to tensors. 'string' key
@@ -328,7 +325,6 @@ def _create_embedding_lookup(column,
         variable,
         embedding_lookup_arguments.input_tensor,
         sparse_weights=embedding_lookup_arguments.weight_tensor,
-        default_id=0,
         combiner=embedding_lookup_arguments.combiner,
         name=column.name + '_weights')
     return variable, predictions
@@ -387,7 +383,6 @@ def _create_joint_embedding_lookup(columns_to_tensors,
         variable,
         sparse_tensor,
         sparse_weights=None,
-        default_id=0,
         combiner='sum',
         name='_weights')
     return variable, predictions
@@ -488,8 +483,6 @@ def weighted_sum_from_feature_columns(columns_to_tensors,
 
     occupation = sparse_column_with_hash_bucket(column_name="occupation",
                                               hash_bucket_size=1000)
-    occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16,
-                                     combiner="sum")
     age = real_valued_column("age")
     age_buckets = bucketized_column(
         source_column=age,
@@ -497,7 +490,7 @@ def weighted_sum_from_feature_columns(columns_to_tensors,
     occupation_x_age = crossed_column(columns=[occupation, age_buckets],
                                       hash_bucket_size=10000)
 
-    feature_columns=[occupation_emb, occupation_x_age]
+    feature_columns=[age_buckets, occupation, occupation_x_age]
 
   Args:
     columns_to_tensors: A mapping from feature column to tensors. 'string' key
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
index 0bcbcba5e30..dd3bd8fcb0b 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
@@ -644,7 +644,7 @@ class CreateInputLayersForDNNsTest(tf.test.TestCase):
     hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10)
     wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"],
                                   indices=[[0, 0], [1, 0], [1, 1]],
-                                  shape=[2, 2])
+                                  shape=[3, 2])
     features = {"wire": wire_tensor}
     embeded_sparse = tf.contrib.layers.embedding_column(
         hashed_sparse, 1, combiner="sum", initializer=init_ops.ones_initializer)
@@ -653,18 +653,18 @@ class CreateInputLayersForDNNsTest(tf.test.TestCase):
     with self.test_session():
       tf.initialize_all_variables().run()
       # score: (number of values)
-      self.assertAllEqual(output.eval(), [[1.], [2.]])
+      self.assertAllEqual(output.eval(), [[1.], [2.], [0.]])
 
   def testEmbeddingColumnWithWeightedSparseColumnForDNN(self):
     ids = tf.contrib.layers.sparse_column_with_keys(
         "ids", ["marlo", "omar", "stringer"])
     ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"],
                                  indices=[[0, 0], [1, 0], [1, 1]],
-                                 shape=[2, 2])
+                                 shape=[3, 2])
     weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights")
     weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0],
                                      indices=[[0, 0], [1, 0], [1, 1]],
-                                     shape=[2, 2])
+                                     shape=[3, 2])
     features = {"ids": ids_tensor,
                 "weights": weights_tensor}
     embeded_sparse = tf.contrib.layers.embedding_column(
@@ -675,7 +675,7 @@ class CreateInputLayersForDNNsTest(tf.test.TestCase):
       tf.initialize_all_variables().run()
       tf.initialize_all_tables().run()
       # score: (sum of weights)
-      self.assertAllEqual(output.eval(), [[10.], [50.]])
+      self.assertAllEqual(output.eval(), [[10.], [50.], [0.]])
 
   def testInputLayerWithCollectionsForDNN(self):
     real_valued = tf.contrib.layers.real_valued_column("price")
@@ -960,7 +960,7 @@ class SequenceInputFromFeatureColumnTest(tf.test.TestCase):
 
     # `ids_tensor` consists of 7 instances of <empty>, 3 occurences of "b",
     # 2 occurences of "c" and 1 instance of "a".
-    expected_gradient_values = sorted([7., 3., 2., 1.] * embedding_dimension)
+    expected_gradient_values = sorted([0., 3., 2., 1.] * embedding_dimension)
     actual_gradient_values = np.sort(gradients[0].values, axis=None)
     self.assertAllClose(expected_gradient_values, actual_gradient_values)
 
diff --git a/tensorflow/contrib/layers/python/layers/target_column.py b/tensorflow/contrib/layers/python/layers/target_column.py
index 711510f32cf..0667fee32a5 100644
--- a/tensorflow/contrib/layers/python/layers/target_column.py
+++ b/tensorflow/contrib/layers/python/layers/target_column.py
@@ -22,6 +22,7 @@ import six
 
 from tensorflow.contrib import losses
 from tensorflow.contrib import metrics as metrics_lib
+from tensorflow.contrib.framework import deprecated
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -30,6 +31,11 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 
 
+@deprecated(
+    "2016-11-12",
+    "This file will be removed after the deprecation date."
+    "Please switch to "
+    "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py")
 def regression_target(label_name=None,
                       weight_column_name=None,
                       target_dimension=1):
@@ -54,6 +60,11 @@ def regression_target(label_name=None,
 # TODO(zakaria): Add logistic_regression_target
 
 
+@deprecated(
+    "2016-11-12",
+    "This file will be removed after the deprecation date."
+    "Please switch to "
+    "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py")
 def multi_class_target(n_classes, label_name=None, weight_column_name=None):
   """Creates a _TargetColumn for multi class single label classification.
 
@@ -85,6 +96,11 @@ def multi_class_target(n_classes, label_name=None, weight_column_name=None):
                                  weight_column_name=weight_column_name)
 
 
+@deprecated(
+    "2016-11-12",
+    "This file will be removed after the deprecation date."
+    "Please switch to "
+    "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py")
 def binary_svm_target(label_name=None, weight_column_name=None):
   """Creates a _TargetColumn for binary classification with SVMs.
 
@@ -105,6 +121,11 @@ def binary_svm_target(label_name=None, weight_column_name=None):
                                 weight_column_name=weight_column_name)
 
 
+@deprecated(
+    "2016-11-12",
+    "This file will be removed after the deprecation date."
+    "Please switch to "
+    "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py")
 class ProblemType(object):
   UNSPECIFIED = 0
   CLASSIFICATION = 1
@@ -391,7 +412,6 @@ def _log_loss_with_two_classes(logits, target):
 
 
 def _softmax_cross_entropy_loss(logits, target):
-  # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target.
   # Check that we got int32/int64 for classification.
   if (not target.dtype.is_compatible_with(dtypes.int64) and
       not target.dtype.is_compatible_with(dtypes.int32)):
@@ -416,6 +436,11 @@ def _run_metrics(predictions, targets, metrics, weights):
   return result
 
 
+@deprecated(
+    "2016-11-12",
+    "This file will be removed after the deprecation date."
+    "Please switch to "
+    "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py")
 def get_default_binary_metrics_for_eval(thresholds):
   """Returns a dictionary of basic metrics for logistic regression.
 
diff --git a/tensorflow/contrib/layers/python/ops/sparse_feature_cross_op.py b/tensorflow/contrib/layers/python/ops/sparse_feature_cross_op.py
index a3cbd2a35ab..560598024a7 100644
--- a/tensorflow/contrib/layers/python/ops/sparse_feature_cross_op.py
+++ b/tensorflow/contrib/layers/python/ops/sparse_feature_cross_op.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+
+from tensorflow.contrib.framework import deprecated_arg_values
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import load_library
@@ -28,9 +30,21 @@ _sparse_feature_cross_op = load_library.load_op_library(
     resource_loader.get_path_to_datafile("_sparse_feature_cross_op.so"))
 assert _sparse_feature_cross_op, "Could not load _sparse_feature_cross_op.so."
 
+# Default hash key for the FingerprintCat64.
+SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY = 0xDECAFCAFFE
 
+
+@deprecated_arg_values(
+    "2016-11-20",
+    "The default behavior of sparse_feature_cross is changing, the default\n"
+    "value for hash_key will change to SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY.\n"
+    "From that point on sparse_feature_cross will always use FingerprintCat64\n"
+    "to concatenate the feature fingerprints. And the underlying\n"
+    "_sparse_feature_cross_op.sparse_feature_cross operation will be marked\n"
+    "as deprecated.",
+    hash_key=None)
 def sparse_feature_cross(inputs, hashed_output=False, num_buckets=0,
-                         name=None):
+                         name=None, hash_key=None):
   """Crosses a list of Tensor or SparseTensor objects.
 
   See sparse_feature_cross_kernel.cc for more details.
@@ -42,6 +56,10 @@ def sparse_feature_cross(inputs, hashed_output=False, num_buckets=0,
     num_buckets: It is used if hashed_output is true.
       output = hashed_value%num_buckets if num_buckets > 0 else hashed_value.
     name: A name prefix for the returned tensors (optional).
+    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
+      function to combine the crosses fingerprints on SparseFeatureCrossOp.
+      The default value is None, but will become
+      SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY after 2016-11-20 (optional).
 
   Returns:
     A `SparseTensor` with the crossed features.
@@ -74,18 +92,36 @@ def sparse_feature_cross(inputs, hashed_output=False, num_buckets=0,
       dense_inputs[i] = math_ops.to_int64(dense_inputs[i])
       internal_type = dtypes.int64
 
-  indices_out, values_out, shape_out = (
-      _sparse_feature_cross_op.sparse_feature_cross(indices,
-                                                    values,
-                                                    shapes,
-                                                    dense_inputs,
-                                                    hashed_output,
-                                                    num_buckets,
-                                                    out_type=out_type,
-                                                    internal_type=internal_type,
-                                                    name=name))
+  if hash_key:
+    indices_out, values_out, shape_out = (
+        _sparse_feature_cross_op.sparse_feature_cross_v2(
+            indices,
+            values,
+            shapes,
+            dense_inputs,
+            hashed_output,
+            num_buckets,
+            hash_key=hash_key,
+            out_type=out_type,
+            internal_type=internal_type,
+            name=name))
+  else:
+    indices_out, values_out, shape_out = (
+        _sparse_feature_cross_op.sparse_feature_cross(
+            indices,
+            values,
+            shapes,
+            dense_inputs,
+            hashed_output,
+            num_buckets,
+            out_type=out_type,
+            internal_type=internal_type,
+            name=name))
+
   return ops.SparseTensor(indices_out, values_out, shape_out)
 
 
 ops.RegisterShape("SparseFeatureCross")(common_shapes.call_cpp_shape_fn)
 ops.NotDifferentiable("SparseFeatureCross")
+ops.RegisterShape("SparseFeatureCrossV2")(common_shapes.call_cpp_shape_fn)
+ops.NotDifferentiable("SparseFeatureCrossV2")
diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD
index 40e014a4409..b6d83884f4d 100644
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
@@ -27,19 +27,7 @@ py_library(
 py_test(
     name = "base_test",
     size = "medium",
-    srcs = ["python/learn/tests/base_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":learn",
-        "//tensorflow:tensorflow_py",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
-
-py_test(
-    name = "load_csv_test",
-    size = "small",
-    srcs = ["python/learn/tests/load_csv_test.py"],
+    srcs = ["python/learn/estimators/base_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -51,7 +39,7 @@ py_test(
 py_test(
     name = "data_feeder_test",
     size = "small",
-    srcs = ["python/learn/tests/data_feeder_test.py"],
+    srcs = ["python/learn/learn_io/data_feeder_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -274,7 +262,7 @@ py_test(
 py_test(
     name = "estimators_test",
     size = "small",
-    srcs = ["python/learn/tests/estimators_test.py"],
+    srcs = ["python/learn/estimators/estimators_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -286,7 +274,7 @@ py_test(
 py_test(
     name = "metric_spec_test",
     size = "small",
-    srcs = ["python/learn/tests/metric_spec_test.py"],
+    srcs = ["python/learn/metric_spec_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -298,7 +286,7 @@ py_test(
 py_test(
     name = "experiment_test",
     size = "small",
-    srcs = ["python/learn/tests/experiment_test.py"],
+    srcs = ["python/learn/experiment_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -310,7 +298,7 @@ py_test(
 py_test(
     name = "graph_actions_test",
     size = "small",
-    srcs = ["python/learn/tests/graph_actions_test.py"],
+    srcs = ["python/learn/graph_actions_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -322,7 +310,7 @@ py_test(
 py_test(
     name = "learn_runner_test",
     size = "small",
-    srcs = ["python/learn/tests/learn_runner_test.py"],
+    srcs = ["python/learn/learn_runner_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -334,7 +322,7 @@ py_test(
 py_test(
     name = "monitors_test",
     size = "small",
-    srcs = ["python/learn/tests/monitors_test.py"],
+    srcs = ["python/learn/monitors_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -347,7 +335,7 @@ py_test(
     name = "run_config_test",
     size = "small",
     srcs = [
-        "python/learn/tests/run_config_test.py",
+        "python/learn/estimators/run_config_test.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
@@ -356,18 +344,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "basic_session_run_hooks_test",
-    size = "small",
-    srcs = ["python/learn/tests/basic_session_run_hooks_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        ":learn",
-        "//tensorflow:tensorflow_py",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
-
 py_test(
     name = "tensor_signature_test",
     srcs = ["python/learn/estimators/tensor_signature_test.py"],
@@ -430,15 +406,10 @@ py_test(
 )
 
 py_test(
-    name = "dnn_test",
-    size = "medium",
-    srcs = ["python/learn/estimators/dnn_test.py"],
-    shard_count = 4,
+    name = "head_test",
+    size = "small",
+    srcs = ["python/learn/estimators/head_test.py"],
     srcs_version = "PY2AND3",
-    tags = [
-        "manual",  # http://b/31934515
-        "notap",
-    ],
     deps = [
         ":learn",
         "//tensorflow:tensorflow_py",
@@ -447,9 +418,10 @@ py_test(
 )
 
 py_test(
-    name = "dnn_sampled_softmax_classifier_test",
-    size = "large",
-    srcs = ["python/learn/estimators/dnn_sampled_softmax_classifier_test.py"],
+    name = "dnn_test",
+    size = "medium",
+    srcs = ["python/learn/estimators/dnn_test.py"],
+    shard_count = 4,
     srcs_version = "PY2AND3",
     tags = [
         "manual",  # http://b/31934515
@@ -538,7 +510,7 @@ py_test(
 py_test(
     name = "grid_search_test",
     size = "small",
-    srcs = ["python/learn/tests/grid_search_test.py"],
+    srcs = ["python/learn/grid_search_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -550,7 +522,7 @@ py_test(
 py_test(
     name = "io_test",
     size = "small",
-    srcs = ["python/learn/tests/io_test.py"],
+    srcs = ["python/learn/learn_io/io_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -562,7 +534,7 @@ py_test(
 py_test(
     name = "multioutput_test",
     size = "small",
-    srcs = ["python/learn/tests/multioutput_test.py"],
+    srcs = ["python/learn/estimators/multioutput_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -574,7 +546,7 @@ py_test(
 py_test(
     name = "nonlinear_test",
     size = "medium",
-    srcs = ["python/learn/tests/nonlinear_test.py"],
+    srcs = ["python/learn/estimators/nonlinear_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -586,7 +558,7 @@ py_test(
 py_test(
     name = "regression_test",
     size = "small",
-    srcs = ["python/learn/tests/regression_test.py"],
+    srcs = ["python/learn/estimators/regression_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -598,7 +570,7 @@ py_test(
 py_test(
     name = "ops_test",
     size = "small",
-    srcs = ["python/learn/ops/tests/ops_test.py"],
+    srcs = ["python/learn/ops/ops_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -610,7 +582,7 @@ py_test(
 py_test(
     name = "seq2seq_ops_test",
     size = "small",
-    srcs = ["python/learn/ops/tests/seq2seq_ops_test.py"],
+    srcs = ["python/learn/ops/seq2seq_ops_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
@@ -687,7 +659,7 @@ py_test(
 py_test(
     name = "stability_test",
     size = "small",
-    srcs = ["python/learn/tests/stability_test.py"],
+    srcs = ["python/learn/estimators/stability_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":learn",
diff --git a/tensorflow/contrib/learn/python/learn/datasets/BUILD b/tensorflow/contrib/learn/python/learn/datasets/BUILD
index 43a4aa0d0d3..ec1fa815f84 100644
--- a/tensorflow/contrib/learn/python/learn/datasets/BUILD
+++ b/tensorflow/contrib/learn/python/learn/datasets/BUILD
@@ -43,3 +43,15 @@ filegroup(
     ),
     visibility = ["//tensorflow:__subpackages__"],
 )
+
+py_test(
+    name = "load_csv_test",
+    size = "small",
+    srcs = ["load_csv_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/learn",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
diff --git a/tensorflow/contrib/learn/python/learn/tests/load_csv_test.py b/tensorflow/contrib/learn/python/learn/datasets/load_csv_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/tests/load_csv_test.py
rename to tensorflow/contrib/learn/python/learn/datasets/load_csv_test.py
diff --git a/tensorflow/contrib/learn/python/learn/estimators/__init__.py b/tensorflow/contrib/learn/python/learn/estimators/__init__.py
index a46f6ec364b..b2033add2f4 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/__init__.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/__init__.py
@@ -14,7 +14,6 @@
 # ==============================================================================
 
 """Estimators."""
-
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -32,6 +31,8 @@ from tensorflow.contrib.learn.python.learn.estimators.estimator import Estimator
 from tensorflow.contrib.learn.python.learn.estimators.estimator import infer_real_valued_columns_from_input
 from tensorflow.contrib.learn.python.learn.estimators.estimator import infer_real_valued_columns_from_input_fn
 from tensorflow.contrib.learn.python.learn.estimators.estimator import ModeKeys
+from tensorflow.contrib.learn.python.learn.estimators.head import MetricKey
+from tensorflow.contrib.learn.python.learn.estimators.head import PedictionKey
 from tensorflow.contrib.learn.python.learn.estimators.linear import LinearClassifier
 from tensorflow.contrib.learn.python.learn.estimators.linear import LinearRegressor
 from tensorflow.contrib.learn.python.learn.estimators.logistic_regressor import LogisticRegressor
diff --git a/tensorflow/contrib/learn/python/learn/tests/base_test.py b/tensorflow/contrib/learn/python/learn/estimators/base_test.py
similarity index 88%
rename from tensorflow/contrib/learn/python/learn/tests/base_test.py
rename to tensorflow/contrib/learn/python/learn/estimators/base_test.py
index 7d6e193e7cc..ed486adacde 100644
--- a/tensorflow/contrib/learn/python/learn/tests/base_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/base_test.py
@@ -43,7 +43,7 @@ class BaseTest(tf.test.TestCase):
     feature_columns = learn.infer_real_valued_columns_from_input(x)
     regressor = learn.LinearRegressor(feature_columns=feature_columns)
     regressor.fit(x, y, max_steps=100)
-    score = mean_squared_error(y, regressor.predict(x))
+    score = mean_squared_error(y, np.array(list(regressor.predict(x))))
     self.assertLess(score, 1.0, "Failed with score = {0}".format(score))
 
   def testIris(self):
@@ -52,7 +52,7 @@ class BaseTest(tf.test.TestCase):
         feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
         n_classes=3)
     classifier.fit(iris.data, [x for x in iris.target], max_steps=100)
-    score = accuracy_score(iris.target, classifier.predict(iris.data))
+    score = accuracy_score(iris.target, list(classifier.predict(iris.data)))
     self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
 
   def testIrisAllVariables(self):
@@ -82,7 +82,7 @@ class BaseTest(tf.test.TestCase):
         feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
         n_classes=3, model_dir=output_dir)
     classifier.fit(iris.data, iris.target, max_steps=100)
-    score = accuracy_score(iris.target, classifier.predict(iris.data))
+    score = accuracy_score(iris.target, list(classifier.predict(iris.data)))
     self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
     # TODO(ipolosukhin): Check that summaries are correctly written.
 
@@ -92,9 +92,9 @@ class BaseTest(tf.test.TestCase):
         feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
         n_classes=3)
     classifier.fit(iris.data, iris.target, steps=100)
-    score1 = accuracy_score(iris.target, classifier.predict(iris.data))
+    score1 = accuracy_score(iris.target, list(classifier.predict(iris.data)))
     classifier.fit(iris.data, iris.target, steps=500)
-    score2 = accuracy_score(iris.target, classifier.predict(iris.data))
+    score2 = accuracy_score(iris.target, list(classifier.predict(iris.data)))
     self.assertGreater(
         score2, score1,
         "Failed with score2 {0} <= score1 {1}".format(score2, score1))
@@ -120,9 +120,10 @@ class BaseTest(tf.test.TestCase):
         feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
         n_classes=3)
     classifier.fit(iris_data(), iris_target(), max_steps=500)
-    score1 = accuracy_score(iris.target, classifier.predict(iris.data))
+    score1 = accuracy_score(iris.target,
+                            list(classifier.predict(iris.data)))
     score2 = accuracy_score(iris.target,
-                            classifier.predict(iris_predict_data()))
+                            list(classifier.predict(iris_predict_data())))
     self.assertGreater(score1, 0.5, "Failed with score = {0}".format(score1))
     self.assertEqual(score2, score1, "Scores from {0} iterator doesn't "
                      "match score {1} from full "
@@ -137,7 +138,7 @@ class BaseTest(tf.test.TestCase):
           feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
           n_classes=3)
       classifier.fit(iris.data, iris.target, max_steps=250)
-      score = log_loss(iris.target, classifier.predict_proba(iris.data))
+      score = log_loss(iris.target, list(classifier.predict_proba(iris.data)))
       self.assertLess(score, 0.8, "Failed with score = {0}".format(score))
 
   def testBoston(self):
@@ -146,7 +147,8 @@ class BaseTest(tf.test.TestCase):
     regressor = learn.LinearRegressor(
         feature_columns=learn.infer_real_valued_columns_from_input(boston.data))
     regressor.fit(boston.data, boston.target, max_steps=500)
-    score = mean_squared_error(boston.target, regressor.predict(boston.data))
+    score = mean_squared_error(
+        boston.target, np.array(list(regressor.predict(boston.data))))
     self.assertLess(score, 150, "Failed with score = {0}".format(score))
 
   def testUnfitted(self):
diff --git a/tensorflow/contrib/learn/python/learn/estimators/classifier.py b/tensorflow/contrib/learn/python/learn/estimators/classifier.py
index ac9ec45f89b..978ab9339b9 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/classifier.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/classifier.py
@@ -126,7 +126,7 @@ class Classifier(estimator.Estimator):
   @deprecated_arg_values(
       estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
       as_iterable=False)
-  def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=False):
+  def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=True):
     """Returns predicted classes for given features.
 
     Args:
@@ -160,7 +160,7 @@ class Classifier(estimator.Estimator):
       estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
       as_iterable=False)
   def predict_proba(
-      self, x=None, input_fn=None, batch_size=None, as_iterable=False):
+      self, x=None, input_fn=None, batch_size=None, as_iterable=True):
     """Returns predicted probabilty distributions for given features.
 
     Args:
diff --git a/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py b/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py
index a2883c39d37..353f25d1c2a 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py
@@ -32,9 +32,8 @@ from tensorflow.contrib.session_bundle import manifest_pb2
 
 def iris_input_fn(num_epochs=None):
   iris = tf.contrib.learn.datasets.load_iris()
-  features = tf.reshape(tf.constant(iris.data), [-1, 4])
-  if num_epochs:
-    features = tf.train.limit_epochs(features, num_epochs=num_epochs)
+  features = tf.train.limit_epochs(
+      tf.reshape(tf.constant(iris.data), [-1, 4]), num_epochs=num_epochs)
   target = tf.reshape(tf.constant(iris.target), [-1])
   return features, target
 
@@ -71,42 +70,22 @@ class ClassifierTest(tf.test.TestCase):
                                       params={'learning_rate': 0.01})
     self._runIrisAll(est)
 
-  def testIrisPredictAsIterable(self):
-    iris = tf.contrib.learn.datasets.load_iris()
-    est = tf.contrib.learn.Classifier(model_fn=logistic_model_fn, n_classes=3)
-    est.fit(iris.data, iris.target, steps=100)
-    scores = est.evaluate(x=iris.data, y=iris.target, name='eval')
-    predictions = list(est.predict(x=iris.data, as_iterable=True))
-    predictions_proba = list(est.predict_proba(x=iris.data, as_iterable=True))
-    self.assertEqual(len(predictions), iris.target.shape[0])
-    self.assertAllEqual(predictions, np.argmax(predictions_proba, axis=1))
-    other_score = _sklearn.accuracy_score(iris.target, predictions)
-    self.assertAllClose(other_score, scores['accuracy'])
-
   def testIrisInputFn(self):
-    iris = tf.contrib.learn.datasets.load_iris()
-    est = tf.contrib.learn.Classifier(model_fn=logistic_model_fn, n_classes=3)
-    est.fit(input_fn=iris_input_fn, steps=100)
-    est.evaluate(input_fn=iris_input_fn, steps=1, name='eval')
-    predictions = est.predict(input_fn=iris_input_fn)
-    self.assertEqual(predictions.shape[0], iris.target.shape[0])
-
-  def testIrisPredictInputFnAsIterable(self):
     iris = tf.contrib.learn.datasets.load_iris()
     est = tf.contrib.learn.Classifier(model_fn=logistic_model_fn, n_classes=3)
     est.fit(input_fn=iris_input_fn, steps=100)
     est.evaluate(input_fn=iris_input_fn, steps=1, name='eval')
     predict_input_fn = functools.partial(iris_input_fn, num_epochs=1)
-    predictions = list(est.predict(input_fn=predict_input_fn, as_iterable=True))
+    predictions = list(est.predict(input_fn=predict_input_fn))
     self.assertEqual(len(predictions), iris.target.shape[0])
 
   def _runIrisAll(self, est):
     iris = tf.contrib.learn.datasets.load_iris()
     est.fit(iris.data, iris.target, steps=100)
     scores = est.evaluate(x=iris.data, y=iris.target, name='eval')
-    predictions = est.predict(x=iris.data)
-    predictions_proba = est.predict_proba(x=iris.data)
-    self.assertEqual(predictions.shape[0], iris.target.shape[0])
+    predictions = list(est.predict(x=iris.data))
+    predictions_proba = list(est.predict_proba(x=iris.data))
+    self.assertEqual(len(predictions), iris.target.shape[0])
     self.assertAllEqual(predictions, np.argmax(predictions_proba, axis=1))
     other_score = _sklearn.accuracy_score(iris.target, predictions)
     self.assertAllClose(other_score, scores['accuracy'])
diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
index ae8fb6944d8..88cafc655fc 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
@@ -23,11 +23,11 @@ import tempfile
 
 import tensorflow as tf
 
-from tensorflow.contrib import layers
 from tensorflow.contrib import metrics as metrics_lib
 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 from tensorflow.contrib.learn.python.learn.estimators import composable_model
 from tensorflow.contrib.learn.python.learn.estimators import estimator
+from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import state_ops
 
@@ -42,10 +42,10 @@ def _iris_input_fn():
 class _BaseEstimatorForTest(estimator.BaseEstimator):
 
   def __init__(self,
-               target_column,
+               head,
                feature_columns):
     super(_BaseEstimatorForTest, self).__init__(model_dir=tempfile.mkdtemp())
-    self._target_column = target_column
+    self._head = head
     self._feature_columns = feature_columns
 
   def _get_train_ops(self, features, targets):
@@ -54,18 +54,22 @@ class _BaseEstimatorForTest(estimator.BaseEstimator):
 
     logits = self._model.build_model(
         features, self._feature_columns, is_training=True)
-    loss = self._target_column.loss(logits, targets, features)
-    train_step = self._model.get_train_step(loss)
+    model_fn_ops = self._head.head_ops(features, targets,
+                                       tf.contrib.learn.ModeKeys.TRAIN,
+                                       _noop_training_fn, logits=logits)
+    train_step = self._model.get_train_step(model_fn_ops.loss)
 
     with ops.control_dependencies(train_step):
       with ops.get_default_graph().colocate_with(global_step):
-        return state_ops.assign_add(global_step, 1).op, loss
+        return state_ops.assign_add(global_step, 1).op, model_fn_ops.loss
 
   def _get_eval_ops(self, features, targets, metrics=None):
     logits = self._model.build_model(
         features, self._feature_columns, is_training=False)
-    loss = self._target_column.loss(logits, targets, features)
-    return {'loss': metrics_lib.streaming_mean(loss)}
+    model_fn_ops = self._head.head_ops(features, targets,
+                                       tf.contrib.learn.ModeKeys.TRAIN,
+                                       _noop_training_fn, logits=logits)
+    return {'loss': metrics_lib.streaming_mean(model_fn_ops.loss)}
 
   def _get_predict_ops(self, features):
     raise NotImplementedError
@@ -74,32 +78,32 @@ class _BaseEstimatorForTest(estimator.BaseEstimator):
 class LinearEstimator(_BaseEstimatorForTest):
 
   def __init__(self,
-               target_column,
+               head,
                feature_columns):
-    super(LinearEstimator, self).__init__(target_column, feature_columns)
+    super(LinearEstimator, self).__init__(head, feature_columns)
     self._model = composable_model.LinearComposableModel(
-        num_label_columns=target_column.num_label_columns)
+        num_label_columns=head.logits_dimension)
 
 
 class JointLinearEstimator(_BaseEstimatorForTest):
 
   def __init__(self,
-               target_column,
+               head,
                feature_columns):
-    super(JointLinearEstimator, self).__init__(target_column, feature_columns)
+    super(JointLinearEstimator, self).__init__(head, feature_columns)
     self._model = composable_model.LinearComposableModel(
-        num_label_columns=target_column.num_label_columns, _joint_weights=True)
+        num_label_columns=head.logits_dimension, _joint_weights=True)
 
 
 class DNNEstimator(_BaseEstimatorForTest):
 
   def __init__(self,
-               target_column,
+               head,
                feature_columns,
                hidden_units):
-    super(DNNEstimator, self).__init__(target_column, feature_columns)
+    super(DNNEstimator, self).__init__(head, feature_columns)
     self._model = composable_model.DNNComposableModel(
-        num_label_columns=target_column.num_label_columns,
+        num_label_columns=head.logits_dimension,
         hidden_units=hidden_units)
 
 
@@ -119,8 +123,8 @@ class ComposableModelTest(tf.test.TestCase):
     language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100)
     age = tf.contrib.layers.real_valued_column('age')
 
-    target_column = layers.multi_class_target(n_classes=2)
-    classifier = LinearEstimator(target_column,
+    head = head_lib._multi_class_head(n_classes=2)
+    classifier = LinearEstimator(head,
                                  feature_columns=[age, language])
 
     classifier.fit(input_fn=input_fn, steps=1000)
@@ -144,8 +148,8 @@ class ComposableModelTest(tf.test.TestCase):
     language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100)
     age = tf.contrib.layers.sparse_column_with_hash_bucket('age', 2)
 
-    target_column = layers.multi_class_target(n_classes=2)
-    classifier = JointLinearEstimator(target_column,
+    head = head_lib._multi_class_head(n_classes=2)
+    classifier = JointLinearEstimator(head,
                                       feature_columns=[age, language])
 
     classifier.fit(input_fn=input_fn, steps=1000)
@@ -160,8 +164,8 @@ class ComposableModelTest(tf.test.TestCase):
     cont_features = [
         tf.contrib.layers.real_valued_column('feature', dimension=4)]
 
-    target_column = layers.multi_class_target(n_classes=3)
-    classifier = DNNEstimator(target_column,
+    head = head_lib._multi_class_head(n_classes=3)
+    classifier = DNNEstimator(head,
                               feature_columns=cont_features,
                               hidden_units=[3, 3])
 
@@ -169,5 +173,9 @@ class ComposableModelTest(tf.test.TestCase):
     classifier.evaluate(input_fn=_iris_input_fn, steps=100)
 
 
+def _noop_training_fn(unused_loss):
+  return tf.no_op()
+
+
 if __name__ == '__main__':
   tf.test.main()
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
index 5e0af3a5d3f..8cc46857615 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
@@ -267,16 +267,24 @@ def _dnn_classifier_model_fn(features, targets, mode, params):
 
   if mode == estimator.ModeKeys.TRAIN:
     targets = _reshape_targets(targets)
-    loss = loss_fn(logits, targets,
-                   weight=_get_weight_tensor(features, weight_column_name))
+    weight = _get_weight_tensor(features, weight_column_name)
+    training_loss = loss_fn(logits, targets, weight=weight)
+    loss = _rescale_eval_loss(training_loss, weight)
 
     train_ops = [optimizers.optimize_loss(
-        loss=loss, global_step=contrib_variables.get_global_step(),
-        learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer),
-        clip_gradients=gradient_clip_norm, name=parent_scope)]
+        loss=training_loss,
+        global_step=contrib_variables.get_global_step(),
+        learning_rate=_LEARNING_RATE,
+        optimizer=_get_optimizer(optimizer),
+        clip_gradients=gradient_clip_norm,
+        name=parent_scope,
+        # Empty summaries to prevent optimizers from logging the training_loss.
+        summaries=[])]
     if enable_centered_bias:
       train_ops.append(_centered_bias_step(targets, loss_fn, num_label_columns))
 
+    logging_ops.scalar_summary("loss", loss)
+
     return None, loss, control_flow_ops.group(*train_ops)
 
   elif mode == estimator.ModeKeys.EVAL:
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py
index e61511134f7..1b40681442c 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 import numpy as np
+import six
 
 from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
@@ -28,15 +29,12 @@ from tensorflow.contrib.framework.python.ops import variables as contrib_variabl
 from tensorflow.contrib.layers.python.layers import feature_column_ops
 from tensorflow.contrib.learn.python.learn.estimators import composable_model
 from tensorflow.contrib.learn.python.learn.estimators import estimator
+from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import state_ops
-from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import training
 
 
 def _changing_default_center_bias():
@@ -67,7 +65,7 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
   """
 
   def __init__(self,  # _joint_linear_weights pylint: disable=invalid-name
-               target_column,
+               head,
                model_dir=None,
                linear_feature_columns=None,
                linear_optimizer=None,
@@ -78,13 +76,13 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
                dnn_activation_fn=nn.relu,
                dnn_dropout=None,
                gradient_clip_norm=None,
-               enable_centered_bias=True,
                config=None,
-               feature_engineering_fn=None):
+               feature_engineering_fn=None,
+               default_prediction_key=None):
     """Initializes a _DNNLinearCombinedBaseEstimator instance.
 
     Args:
-      target_column: A _TargetColumn object.
+      head: A _Head object.
       model_dir: Directory to save model parameters, graph and etc. This can
         also be used to load checkpoints from the directory into a estimator
         to continue training a previously saved model.
@@ -111,14 +109,12 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
       gradient_clip_norm: A float > 0. If provided, gradients are clipped
         to their global norm with this clipping ratio. See
         tf.clip_by_global_norm for more details.
-      enable_centered_bias: A bool. If True, estimator will learn a centered
-        bias variable for each class. Rest of the model structure learns the
-        residual after centered bias.
       config: RunConfig object to configure the runtime settings.
       feature_engineering_fn: Feature engineering function. Takes features and
                         targets which are the output of `input_fn` and
                         returns features and targets which will be fed
                         into the model.
+      default_prediction_key: Default prediction key to use with metrics.
 
     Raises:
       ValueError: If both linear_feature_columns and dnn_features_columns are
@@ -130,14 +126,14 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
     num_ps_replicas = config.num_ps_replicas if config else 0
 
     self._linear_model = composable_model.LinearComposableModel(
-        num_label_columns=target_column.num_label_columns,
+        num_label_columns=head.logits_dimension,
         optimizer=linear_optimizer,
         _joint_weights=_joint_linear_weights,
         gradient_clip_norm=gradient_clip_norm,
         num_ps_replicas=num_ps_replicas)
 
     self._dnn_model = composable_model.DNNComposableModel(
-        num_label_columns=target_column.num_label_columns,
+        num_label_columns=head.logits_dimension,
         hidden_units=dnn_hidden_units,
         optimizer=dnn_optimizer,
         activation_fn=dnn_activation_fn,
@@ -149,9 +145,8 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
     self._linear_optimizer = linear_optimizer
     self._dnn_feature_columns = dnn_feature_columns
     self._dnn_hidden_units = dnn_hidden_units
-    self._centered_bias_weight_collection = "centered_bias"
-    self._enable_centered_bias = enable_centered_bias
-    self._target_column = target_column
+    self._head = head
+    self._default_prediction_key = default_prediction_key
     self._feature_engineering_fn = (
         feature_engineering_fn or
         (lambda features, targets: (features, targets)))
@@ -194,9 +189,12 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
     return (self._dnn_model.get_bias(model_dir=self._model_dir) +
             [self.get_variable_value("centered_bias_weight")])
 
-  def _get_target_column(self):
-    """Returns the target column of this Estimator."""
-    return self._target_column
+  # TODO(zakaria): Remove this function once export. export_estimator is
+  #   obsolete.
+  def _create_signature_fn(self):
+    """Returns a function to create export signature of this Estimator."""
+    # pylint: disable=protected-access
+    return self._head._create_signature_fn()
 
   def _get_feature_dict(self, features):
     if isinstance(features, dict):
@@ -205,45 +203,60 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
 
   def _get_train_ops(self, features, targets):
     """See base class."""
-    global_step = contrib_variables.get_global_step()
-    assert global_step
 
     features = self._get_feature_dict(features)
     features, targets = self._feature_engineering_fn(features, targets)
     logits = self._logits(features, is_training=True)
-    if self._enable_centered_bias:
-      centered_bias_step = [self._centered_bias_step(targets, features)]
-    else:
-      centered_bias_step = []
-    with ops.control_dependencies(centered_bias_step):
-      training_loss = self._target_column.training_loss(logits, targets,
-                                                        features)
-      weighted_average_loss = self._target_column.loss(logits, targets,
-                                                       features)
 
-    logging_ops.scalar_summary("loss", weighted_average_loss)
+    def _make_training_op(training_loss):
+      global_step = contrib_variables.get_global_step()
+      assert global_step
 
-    linear_train_step = self._linear_model.get_train_step(training_loss)
-    dnn_train_step = (self._dnn_model.get_train_step(training_loss) if
-                      self._dnn_model else [])
+      linear_train_step = self._linear_model.get_train_step(training_loss)
+      dnn_train_step = (self._dnn_model.get_train_step(training_loss) if
+                        self._dnn_model else [])
+      with ops.control_dependencies(linear_train_step + dnn_train_step):
+        with ops.get_default_graph().colocate_with(global_step):
+          return state_ops.assign_add(global_step, 1).op
 
-    with ops.control_dependencies(linear_train_step + dnn_train_step):
-      with ops.get_default_graph().colocate_with(global_step):
-        return state_ops.assign_add(global_step, 1).op, weighted_average_loss
+    model_fn_ops = self._head.head_ops(features, targets,
+                                       estimator.ModeKeys.TRAIN,
+                                       _make_training_op,
+                                       logits=logits)
+    return model_fn_ops.training_op, model_fn_ops.loss
 
   def _get_eval_ops(self, features, targets, metrics=None):
     """See base class."""
     features = self._get_feature_dict(features)
     features, targets = self._feature_engineering_fn(features, targets)
     logits = self._logits(features)
-    return self._target_column.get_eval_ops(features, logits, targets, metrics)
+
+    model_fn_ops = self._head.head_ops(features, targets,
+                                       estimator.ModeKeys.EVAL, None,
+                                       logits=logits)
+    all_metrics = model_fn_ops.default_metrics
+    if metrics:
+      for name, metric in six.iteritems(metrics):
+        if not isinstance(name, tuple):
+          # TODO(zakaria): remove once deprecation is finished (b/31229024)
+          all_metrics[(name, self._default_prediction_key)] = metric
+        else:
+          all_metrics[name] = metric
+    # TODO(zakaria): Remove this once we refactor this class to delegate
+    #   to estimator.
+    # pylint: disable=protected-access
+    result = estimator._make_metrics_ops(all_metrics, features, targets,
+                                         model_fn_ops.predictions)
+    return result
 
   def _get_predict_ops(self, features):
     """See base class."""
     features = self._get_feature_dict(features)
     features, _ = self._feature_engineering_fn(features, None)
     logits = self._logits(features)
-    return self._target_column.logits_to_predictions(logits, proba=True)
+    model_fn_ops = self._head.head_ops(features, None, estimator.ModeKeys.INFER,
+                                       None, logits=logits)
+    return model_fn_ops.predictions
 
   @deprecated(
       "2016-09-23",
@@ -278,32 +291,6 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
     return self._linear_model.build_model(
         features, self._linear_feature_columns, is_training)
 
-  def _centered_bias(self):
-    centered_bias = variables.Variable(
-        array_ops.zeros([self._target_column.num_label_columns]),
-        collections=[self._centered_bias_weight_collection,
-                     ops.GraphKeys.VARIABLES],
-        name="centered_bias_weight")
-    logging_ops.scalar_summary(
-        ["centered_bias_%d" % cb for cb in range(
-            self._target_column.num_label_columns)],
-        array_ops.reshape(centered_bias, [-1]))
-    return centered_bias
-
-  def _centered_bias_step(self, targets, features):
-    centered_bias = ops.get_collection(self._centered_bias_weight_collection)
-    batch_size = array_ops.shape(targets)[0]
-    logits = array_ops.reshape(
-        array_ops.tile(centered_bias[0], [batch_size]),
-        [batch_size, self._target_column.num_label_columns])
-    with ops.name_scope(None, "centered_bias", (targets, features)):
-      training_loss = self._target_column.training_loss(
-          logits, targets, features)
-    # Learn central bias by an optimizer. 0.1 is a convervative lr for a
-    # single variable.
-    return training.AdagradOptimizer(0.1).minimize(
-        training_loss, var_list=centered_bias)
-
   def _logits(self, features, is_training=False):
     linear_feature_columns = self._get_linear_feature_columns()
     dnn_feature_columns = self._get_dnn_feature_columns()
@@ -319,10 +306,7 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
     else:
       logits = self._linear_logits(features, is_training)
 
-    if self._enable_centered_bias:
-      return nn.bias_add(logits, self._centered_bias())
-    else:
-      return logits
+    return logits
 
 
 class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
@@ -448,10 +432,11 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
     if enable_centered_bias is None:
       enable_centered_bias = True
       _changing_default_center_bias()
-
-    target_column = layers.multi_class_target(
+    # pylint: disable=protected-access
+    head = head_lib._multi_class_head(
         n_classes=n_classes,
-        weight_column_name=weight_column_name)
+        weight_column_name=weight_column_name,
+        enable_centered_bias=enable_centered_bias)
     super(DNNLinearCombinedClassifier, self).__init__(
         model_dir=model_dir,
         linear_feature_columns=linear_feature_columns,
@@ -463,15 +448,15 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
         dnn_activation_fn=dnn_activation_fn,
         dnn_dropout=dnn_dropout,
         gradient_clip_norm=gradient_clip_norm,
-        enable_centered_bias=enable_centered_bias,
-        target_column=target_column,
+        head=head,
         config=config,
-        feature_engineering_fn=feature_engineering_fn)
+        feature_engineering_fn=feature_engineering_fn,
+        default_prediction_key=head_lib.PedictionKey.CLASSES)
 
   @deprecated_arg_values(
       estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
       as_iterable=False)
-  def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=False):
+  def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=True):
     """Returns predicted classes for given features.
 
     Args:
@@ -498,7 +483,7 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
       estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
       as_iterable=False)
   def predict_proba(
-      self, x=None, input_fn=None, batch_size=None, as_iterable=False):
+      self, x=None, input_fn=None, batch_size=None, as_iterable=True):
     """Returns prediction probabilities for given features.
 
     Args:
@@ -517,6 +502,11 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator):
     return super(DNNLinearCombinedClassifier, self).predict(
         x=x, input_fn=input_fn, batch_size=batch_size, as_iterable=as_iterable)
 
+  def _get_predict_ops(self, features):
+    """See base class."""
+    return super(DNNLinearCombinedClassifier, self)._get_predict_ops(features)[
+        head_lib.PedictionKey.PROBABILITIES]
+
 
 class DNNLinearCombinedRegressor(_DNNLinearCombinedBaseEstimator):
   """A regressor for TensorFlow Linear and DNN joined training models.
@@ -642,9 +632,11 @@ class DNNLinearCombinedRegressor(_DNNLinearCombinedBaseEstimator):
     if enable_centered_bias is None:
       enable_centered_bias = True
       _changing_default_center_bias()
-    target_column = layers.regression_target(
+    # pylint: disable=protected-access
+    head = head_lib._regression_head(
         weight_column_name=weight_column_name,
-        target_dimension=target_dimension)
+        target_dimension=target_dimension,
+        enable_centered_bias=enable_centered_bias)
     super(DNNLinearCombinedRegressor, self).__init__(
         model_dir=model_dir,
         linear_feature_columns=linear_feature_columns,
@@ -656,7 +648,14 @@ class DNNLinearCombinedRegressor(_DNNLinearCombinedBaseEstimator):
         dnn_activation_fn=dnn_activation_fn,
         dnn_dropout=dnn_dropout,
         gradient_clip_norm=gradient_clip_norm,
-        enable_centered_bias=enable_centered_bias,
-        target_column=target_column,
+        head=head,
         config=config,
-        feature_engineering_fn=feature_engineering_fn)
+        feature_engineering_fn=feature_engineering_fn,
+        default_prediction_key=head_lib.PedictionKey.SCORES)
+
+  def _get_predict_ops(self, features):
+    """See base class."""
+    return super(DNNLinearCombinedRegressor, self)._get_predict_ops(features)[
+        head_lib.PedictionKey.SCORES]
+
+
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
index 452973f7528..fa28eb6a3e4 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
@@ -254,7 +254,6 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
         dnn_feature_columns=[tf.contrib.layers.real_valued_column('x')],
         dnn_hidden_units=[3, 3],
         config=tf.contrib.learn.RunConfig(tf_random_seed=1))
-
     classifier.fit(input_fn=_input_fn_train, steps=100)
     scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1)
     # Weighted cross entropy = (-7*log(0.25)-3*log(0.75))/10 = 1.06
@@ -289,7 +288,6 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
         dnn_feature_columns=[tf.contrib.layers.real_valued_column('x')],
         dnn_hidden_units=[3, 3],
         config=tf.contrib.learn.RunConfig(tf_random_seed=1))
-
     classifier.fit(input_fn=_input_fn_train, steps=100)
     scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1)
     # The model should learn (y = x) because of the weights, so the accuracy
@@ -371,7 +369,7 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
     def _input_fn_train():
       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
       target = tf.constant([[1], [0], [0], [0]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32)}
       return features, target
 
     def _input_fn_predict():
@@ -387,30 +385,26 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
 
     classifier.fit(input_fn=_input_fn_train, steps=100)
 
-    probs = classifier.predict_proba(input_fn=_input_fn_predict)
+    probs = list(classifier.predict_proba(input_fn=_input_fn_predict))
     self.assertAllClose([[0.75, 0.25]] * 4, probs, 0.05)
-    classes = classifier.predict(input_fn=_input_fn_predict)
-    self.assertListEqual([0] * 4, list(classes))
-
-    probs = classifier.predict_proba(
-        input_fn=_input_fn_predict, as_iterable=True)
-    self.assertAllClose([[0.75, 0.25]] * 4, list(probs), 0.05)
-    classes = classifier.predict(
-        input_fn=_input_fn_predict, as_iterable=True)
-    self.assertListEqual([0] * 4, list(classes))
+    classes = list(classifier.predict(input_fn=_input_fn_predict))
+    self.assertListEqual([0] * 4, classes)
 
   def testCustomMetrics(self):
     """Tests custom evaluation metrics."""
 
-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
       target = tf.constant([[1], [0], [0], [0]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {
+          'x': tf.train.limit_epochs(
+              tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)}
       return features, target
 
     def _my_metric_op(predictions, targets):
       # For the case of binary classification, the 2nd column of "predictions"
       # denotes the model predictions.
+      targets = tf.to_float(targets)
       predictions = tf.slice(predictions, [0, 1], [-1, 1])
       return tf.reduce_sum(tf.mul(predictions, targets))
 
@@ -419,9 +413,9 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
         dnn_feature_columns=[tf.contrib.layers.real_valued_column('x')],
         dnn_hidden_units=[3, 3])
 
-    classifier.fit(input_fn=_input_fn_train, steps=100)
+    classifier.fit(input_fn=_input_fn, steps=100)
     scores = classifier.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
         steps=100,
         metrics={
             'my_accuracy': tf.contrib.metrics.streaming_accuracy,
@@ -431,22 +425,24 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
     self.assertTrue(
         set(['loss', 'my_accuracy', 'my_precision', 'my_metric'
             ]).issubset(set(scores.keys())))
-    predictions = classifier.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(
+        list(classifier.predict(input_fn=predict_input_fn)))
     self.assertEqual(_sklearn.accuracy_score([1, 0, 0, 0], predictions),
                      scores['my_accuracy'])
 
     # Test the case where the 2nd element of the key is neither "classes" nor
     # "probabilities".
-    with self.assertRaises(ValueError):
+    with self.assertRaises(KeyError):
       classifier.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
           steps=100,
           metrics={('bad_name', 'bad_type'): tf.contrib.metrics.streaming_auc})
 
     # Test the case where the tuple of the key doesn't have 2 elements.
     with self.assertRaises(ValueError):
       classifier.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
           steps=100,
           metrics={
               ('bad_length_name', 'classes', 'bad_length'):
@@ -536,7 +532,6 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
     self.assertNotIn('dnn/logits/weights', classifier.get_variable_names())
     self.assertEquals(1, len(classifier.linear_bias_))
     self.assertEquals(2, len(classifier.linear_weights_))
-    print(classifier.linear_weights_)
     self.assertEquals(1, len(classifier.linear_weights_['linear/age/weight']))
     self.assertEquals(
         100, len(classifier.linear_weights_['linear/language/weights']))
@@ -810,10 +805,11 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase):
 
   def testCustomMetrics(self):
     """Tests custom evaluation metrics."""
-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
       target = tf.constant([[1.], [0.], [0.], [0.]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {'x': tf.train.limit_epochs(
+          tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)}
       return features, target
 
     def _my_metric_op(predictions, targets):
@@ -825,9 +821,9 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase):
         dnn_hidden_units=[3, 3],
         config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
-    regressor.fit(input_fn=_input_fn_train, steps=100)
+    regressor.fit(input_fn=_input_fn, steps=100)
     scores = regressor.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
         steps=1,
         metrics={
             'my_error': tf.contrib.metrics.streaming_mean_squared_error,
@@ -836,25 +832,27 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase):
     self.assertIn('loss', set(scores.keys()))
     self.assertIn('my_error', set(scores.keys()))
     self.assertIn('my_metric', set(scores.keys()))
-    predictions = regressor.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(list(regressor.predict(input_fn=predict_input_fn)))
     self.assertAlmostEqual(
         _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
         scores['my_error'])
 
     # Tests that when the key is a tuple, an error is raised.
-    with self.assertRaises(TypeError):
+    with self.assertRaises(KeyError):
       regressor.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
           steps=1,
           metrics={('my_error', 'predictions'
                    ): tf.contrib.metrics.streaming_mean_squared_error})
 
   def testTrainSaveLoad(self):
     """Tests regression with restarting training / evaluate."""
-    def _input_fn():
+    def _input_fn(num_epochs=None):
       # Create 4 rows of (y = x)
       target = tf.constant([[100.], [3.], [2.], [2.]])
-      features = {'x': tf.constant([[100.], [3.], [2.], [2.]])}
+      features = {'x': tf.train.limit_epochs(
+          tf.constant([[100.], [3.], [2.], [2.]]), num_epochs=num_epochs)}
       return features, target
 
     model_dir = tempfile.mkdtemp()
@@ -866,13 +864,14 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase):
         model_dir=model_dir,
         config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
     classifier = new_estimator()
     classifier.fit(input_fn=_input_fn, steps=100)
-    predictions = classifier.predict(input_fn=_input_fn)
+    predictions = list(classifier.predict(input_fn=predict_input_fn))
     del classifier
 
     classifier = new_estimator()
-    predictions2 = classifier.predict(input_fn=_input_fn)
+    predictions2 = list(classifier.predict(input_fn=predict_input_fn))
     self.assertAllClose(predictions, predictions2)
 
   def testTrainWithPartitionedVariables(self):
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier.py
deleted file mode 100644
index e668e71db88..00000000000
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier.py
+++ /dev/null
@@ -1,568 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-
-"""Deep Neural Network estimator for large multi-class multi-label problems.
-
-The Training is sped up using Candidate Sampling. Evaluation and Inference
-uses full softmax.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-import tempfile
-
-from tensorflow.contrib import framework as contrib_framework
-from tensorflow.contrib import layers
-from tensorflow.contrib.framework.python.ops import variables
-from tensorflow.contrib.layers.python.layers import initializers
-from tensorflow.contrib.layers.python.layers import optimizers
-from tensorflow.contrib.learn.python.learn import evaluable
-from tensorflow.contrib.learn.python.learn import metric_spec
-from tensorflow.contrib.learn.python.learn import trainable
-from tensorflow.contrib.learn.python.learn.estimators import estimator
-from tensorflow.contrib.metrics.python.ops import metric_ops
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import partitioned_variables
-from tensorflow.python.ops import standard_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.training import training as train
-
-
-_CLASSES = "classes"
-_TOP_K = "top_k"
-_PROBABILITIES = "probabilities"
-_DEFAULT_LEARNING_RATE = 0.01
-
-
-def _as_iterable(preds, output):
-  for pred in preds:
-    yield pred[output]
-
-
-def _get_optimizer(optimizer):
-  if callable(optimizer):
-    return optimizer()
-  else:
-    return optimizer
-
-
-def _get_default_optimizer():
-  """Default optimizer for DNN models."""
-  return train.AdagradOptimizer(_DEFAULT_LEARNING_RATE)
-
-
-def _get_feature_dict(features):
-  if isinstance(features, dict):
-    return features
-  return {"": features}
-
-
-def _dnn_sampled_softmax_classifier_model_fn(features, targets, mode, params):
-  """model_fn that uses candidate sampling.
-
-  Args:
-    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
-    targets: A single Tensor of shape [batch_size, n_labels] containing
-      the target indices.
-    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
-    params: A dict of hyperparameters that are listed below.
-      hidden_units- List of hidden units per layer. All layers are fully
-        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
-        has 32.
-      feature_columns- An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `FeatureColumn`.
-      n_classes- number of target classes. It must be greater than 2.
-      n_samples- number of sample target classes. Needs to be tuned - A good
-        starting point could be 2% of n_classes.
-      n_labels- number of labels in each example.
-      top_k- The number of classes to predict.
-      optimizer- An instance of `tf.Optimizer` used to train the model. If
-        `None`, will use an Adagrad optimizer.
-      dropout- When not `None`, the probability we will drop out a given
-        coordinate.
-      gradient_clip_norm- A float > 0. If provided, gradients are
-        clipped to their global norm with this clipping ratio. See
-        tf.clip_by_global_norm for more details.
-      num_ps_replicas- The number of parameter server replicas.
-
-  Returns:
-    predictions: A single Tensor or a dict of Tensors.
-    loss: A scalar containing the loss of the step.
-    train_op: The op for training.
-  """
-
-  hidden_units = params["hidden_units"]
-  feature_columns = params["feature_columns"]
-  n_classes = params["n_classes"]
-  n_samples = params["n_samples"]
-  n_labels = params["n_labels"]
-  top_k = params["top_k"]
-  optimizer = params["optimizer"]
-  dropout = params["dropout"]
-  gradient_clip_norm = params["gradient_clip_norm"]
-  num_ps_replicas = params["num_ps_replicas"]
-
-  parent_scope = "dnn_ss"
-
-  features = _get_feature_dict(features)
-  targets = _reshape_targets(targets)
-
-  # Setup the input layer partitioner.
-  input_layer_partitioner = (
-      partitioned_variables.min_max_variable_partitioner(
-          max_partitions=num_ps_replicas,
-          min_slice_size=64 << 20))
-
-  # Create the input layer.
-  with variable_scope.variable_scope(
-      parent_scope + "/input_from_feature_columns",
-      features.values(),
-      partitioner=input_layer_partitioner) as scope:
-    net = layers.input_from_feature_columns(
-        features,
-        feature_columns,
-        weight_collections=[parent_scope],
-        scope=scope)
-
-  # Setup the hidden layer partitioner.
-  hidden_layer_partitioner = (
-      partitioned_variables.min_max_variable_partitioner(
-          max_partitions=num_ps_replicas))
-
-  final_hidden_layer_dim = None
-  # Create hidden layers using fully_connected.
-  for layer_id, num_hidden_units in enumerate(hidden_units):
-    with variable_scope.variable_scope(
-        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
-        partitioner=hidden_layer_partitioner) as scope:
-      net = layers.fully_connected(net,
-                                   num_hidden_units,
-                                   variables_collections=[parent_scope],
-                                   scope=scope)
-      final_hidden_layer_dim = num_hidden_units
-      # Add dropout if it is enabled.
-      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
-        net = layers.dropout(net, keep_prob=(1.0 - dropout))
-
-  # Create the weights and biases for the logit layer.
-  with variable_scope.variable_scope(
-      parent_scope + "/logits", [net],
-      partitioner=hidden_layer_partitioner) as scope:
-    dtype = net.dtype.base_dtype
-    weights_shape = [n_classes, final_hidden_layer_dim]
-    weights = variables.model_variable(
-        "weights",
-        shape=weights_shape,
-        dtype=dtype,
-        initializer=initializers.xavier_initializer(),
-        trainable=True,
-        collections=[parent_scope])
-    biases = variables.model_variable(
-        "biases",
-        shape=[n_classes,],
-        dtype=dtype,
-        initializer=init_ops.zeros_initializer,
-        trainable=True,
-        collections=[parent_scope])
-
-  if mode == estimator.ModeKeys.TRAIN:
-    # Call the candidate sampling APIs and calculate the loss.
-    sampled_values = nn.learned_unigram_candidate_sampler(
-        true_classes=math_ops.to_int64(targets),
-        num_true=n_labels,
-        num_sampled=n_samples,
-        unique=True,
-        range_max=n_classes)
-
-    sampled_softmax_loss = nn.sampled_softmax_loss(
-        weights=weights,
-        biases=biases,
-        inputs=net,
-        labels=math_ops.to_int64(targets),
-        num_sampled=n_samples,
-        num_classes=n_classes,
-        num_true=n_labels,
-        sampled_values=sampled_values)
-
-    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")
-
-    train_op = optimizers.optimize_loss(
-        loss=loss, global_step=contrib_framework.get_global_step(),
-        learning_rate=_DEFAULT_LEARNING_RATE,
-        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
-        name=parent_scope)
-    return None, loss, train_op
-
-  elif mode == estimator.ModeKeys.EVAL:
-    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
-                         biases)
-    predictions = {}
-    predictions[_PROBABILITIES] = nn.softmax(logits)
-    predictions[_CLASSES] = math_ops.argmax(logits, 1)
-    _, predictions[_TOP_K] = nn.top_k(logits, top_k)
-
-    # Since the targets have multiple labels, setup the target probabilities
-    # as 1.0/n_labels for each of the labels.
-    target_one_hot = array_ops.one_hot(
-        indices=targets, depth=n_classes, on_value=1.0 / n_labels)
-    target_one_hot = math_ops.reduce_sum(
-        input_tensor=target_one_hot,
-        reduction_indices=[1])
-
-    loss = math_ops.reduce_mean(
-        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))
-
-    return predictions, loss, None
-
-  elif mode == estimator.ModeKeys.INFER:
-    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
-                         biases)
-    predictions = {}
-    predictions[_PROBABILITIES] = nn.softmax(logits)
-    predictions[_CLASSES] = math_ops.argmax(logits, 1)
-    _, predictions[_TOP_K] = nn.top_k(logits, top_k)
-
-    return predictions, None, None
-
-
-def _reshape_targets(targets):
-  if targets is None:
-    return None
-  check_shape_op = control_flow_ops.Assert(
-      math_ops.less_equal(array_ops.rank(targets), 2),
-      ["target's should be either [batch_size, n_labels] or [batch_size]"])
-  with ops.control_dependencies([check_shape_op]):
-    targets = array_ops.reshape(
-        targets, shape=[array_ops.shape(targets)[0], -1])
-  return targets
-
-
-def _top_k_fn_wrapper(metric_fn, k):
-
-  def wrap_func(predictions, labels):
-    return metric_fn(predictions, _reshape_targets(labels), k=k)
-
-  wrap_func.__name__ = metric_fn.__name__
-  return wrap_func
-
-
-class _DNNSampledSoftmaxClassifier(trainable.Trainable, evaluable.Evaluable):
-  """A classifier for TensorFlow DNN models.
-
-  Example:
-
-  ```python
-  legos = sparse_column_with_hash_bucket(column_name="legos",
-                                         hash_bucket_size=1000)
-  watched_videos = sparse_column_with_hash_bucket(
-                     column_name="watched_videos",
-                     hash_bucket_size=20000)
-
-  legos_emb = embedding_column(sparse_id_column=legos, dimension=16,
-                               combiner="sum")
-  watched_videos_emb = embedding_column(sparse_id_column=watched_videos,
-                                        dimension=256,
-                                        combiner="sum")
-
-  estimator = DNNSampledSoftmaxClassifier(
-      n_classes=500000, n_samples=10000, n_labels=5,
-      feature_columns=[legos_emb, watched_videos_emb],
-      hidden_units=[1024, 512, 256])
-
-  # Or estimator using the Adam optimizer with dropout.
-  estimator = DNNSampledSoftmaxClassifier(
-      feature_columns=[education_emb, occupation_emb],
-      hidden_units=[1024, 512, 256],
-      optimizer=tf.train.ProximalAdagradOptimizer(
-        learning_rate=0.1),
-      dropout=0.1)
-
-  # Input builders
-  def input_fn_train: # returns x, Y
-    pass
-  estimator.fit(input_fn=input_fn_train)
-
-  def input_fn_eval: # returns x, Y
-    pass
-  estimator.evaluate(input_fn=input_fn_eval)
-  estimator.predict(x=x)
-  ```
-
-  Input of `fit` and `evaluate` should have following features,
-    otherwise there will be a `KeyError`:
-
-  * for each `column` in `feature_columns`:
-    - if `column` is a `SparseColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `EmbeddingColumn`, a feature with `key=column.name`
-      whose `value` is a `SparseTensor`.
-    - if `column` is a `WeightedSparseColumn`, two features: the first with
-      `key` the id column name, the second with `key` the weight column name.
-      Both features' `value` must be a `SparseTensor`.
-    - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
-      whose `value` is a `Tensor`.
-  """
-
-  def __init__(self,
-               hidden_units,
-               feature_columns,
-               n_classes,
-               n_samples,
-               n_labels=1,
-               top_k=1,
-               model_dir=None,
-               optimizer=None,
-               dropout=None,
-               gradient_clip_norm=None,
-               config=None,
-               feature_engineering_fn=None):
-    """Initializes a DNNSampledSoftmaxClassifier instance.
-
-    Args:
-      hidden_units: List of hidden units per layer. All layers are fully
-        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
-        has 32.
-      feature_columns: An iterable containing all the feature columns used by
-        the model. All items in the set should be instances of classes derived
-        from `FeatureColumn`.
-      n_classes: number of target classes. It must be greater than 2.
-      n_samples: number of sample target classes. Needs to be tuned - A good
-        starting point could be 2% of n_classes.
-      n_labels: number of labels in each example.
-      top_k: The number of classes to predict.
-      model_dir: Directory to save model parameters, graph and etc. This can
-        also be used to load checkpoints from the directory into a estimator to
-        continue training a previously saved model.
-      optimizer: An instance of `tf.Optimizer` used to train the model. If
-        `None`, will use an Adagrad optimizer.
-      dropout: When not `None`, the probability we will drop out a given
-        coordinate.
-      gradient_clip_norm: A float > 0. If provided, gradients are
-        clipped to their global norm with this clipping ratio. See
-        tf.clip_by_global_norm for more details.
-      config: `RunConfig` object to configure the runtime settings.
-      feature_engineering_fn: Feature engineering function. Takes features and
-                        targets which are the output of `input_fn` and
-                        returns features and targets which will be fed
-                        into the model.
-
-    Returns:
-      A `DNNSampledSoftmaxClassifier` estimator.
-
-    Raises:
-      ValueError: If n_classes <= 2.
-      ValueError: If n_classes < n_samples.
-      ValueError: If n_classes < n_labels.
-    """
-    # Validate all the inputs.
-    if n_classes <= 2:
-      raise ValueError("n_classes should be greater than 2. For n_classes <= 2,"
-                       " use DNNClassifier.")
-    if n_classes < n_samples:
-      raise ValueError("n_classes (%d) should be greater than n_samples (%d)." %
-                       (n_classes, n_samples))
-    if n_classes < n_labels:
-      raise ValueError("n_classes (%d) should be greater than n_labels"
-                       " (%d)." % (n_classes, n_labels))
-
-    self._top_k = top_k
-    self._feature_columns = feature_columns
-    assert self._feature_columns
-    self._model_dir = model_dir or tempfile.mkdtemp()
-
-    # Build the estimator with _dnn_sampled_softmax_classifier_model_fn.
-    self._estimator = estimator.Estimator(
-        model_fn=_dnn_sampled_softmax_classifier_model_fn,
-        model_dir=self._model_dir,
-        config=config,
-        params={
-            "hidden_units": hidden_units,
-            "feature_columns": feature_columns,
-            "n_classes": n_classes,
-            "n_samples": n_samples,
-            "n_labels": n_labels,
-            "top_k": top_k,
-            "optimizer": optimizer or _get_default_optimizer(),
-            "dropout": dropout,
-            "gradient_clip_norm": gradient_clip_norm,
-            "num_ps_replicas": config.num_ps_replicas if config else 0
-        },
-        feature_engineering_fn=feature_engineering_fn)
-
-  def get_estimator(self):
-    return self._estimator
-
-  def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
-          monitors=None, max_steps=None):
-    """See trainable.Trainable."""
-    return self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps,
-                               batch_size=batch_size, monitors=monitors,
-                               max_steps=max_steps)
-
-  def evaluate(self, x=None, y=None, input_fn=None, feed_fn=None,
-               batch_size=None, steps=None, metrics=None, name=None,
-               range_k=None):
-    # pylint: disable=g-doc-args,g-doc-return-or-yield
-    """See evaluable.Evaluable for a description of the Args.
-
-    Calculates the following metrics by default:
-      loss
-      average_precision@top_k: see
-        https://en.wikipedia.org/wiki/Information_retrieval#Average_precision
-      for k in range_k:
-        precision@k and recall@k
-
-    range_k: A list of numbers where precision and recall have to be obtained.
-      For eg. range_k=[1,5] will calculate precision@1, precision@5,
-      recall@1 and recall@5. If None, defaults to [1, top_k].
-    """
-    if not metrics:
-      metrics = {}
-    metrics.update({
-        "average_precision_at_%d" % self._top_k: metric_spec.MetricSpec(
-            metric_fn=_top_k_fn_wrapper(
-                metric_ops.streaming_sparse_average_precision_at_k,
-                k=self._top_k),
-            prediction_key=_PROBABILITIES)
-    })
-    if range_k is None:
-      if self._top_k > 1:
-        range_k = [1, self._top_k]
-      else:
-        range_k = [1]
-    for k in range_k:
-      metrics.update({
-          "precision_at_%d" % k: metric_spec.MetricSpec(
-              metric_fn=_top_k_fn_wrapper(
-                  metric_ops.streaming_sparse_precision_at_k, k=k),
-              prediction_key=_PROBABILITIES,)
-      })
-      metrics.update({
-          "recall_at_%d" % k: metric_spec.MetricSpec(
-              metric_fn=_top_k_fn_wrapper(
-                  metric_ops.streaming_sparse_recall_at_k, k=k),
-              prediction_key=_PROBABILITIES,)
-      })
-
-    return self._estimator.evaluate(x=x, y=y, input_fn=input_fn,
-                                    feed_fn=feed_fn, batch_size=batch_size,
-                                    steps=steps, metrics=metrics, name=name)
-
-  def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=False,
-              get_top_k=False):
-    """Returns predicted classes for given features.
-
-    Args:
-      x: features.
-      input_fn: Input function. If set, x must be None.
-      batch_size: Override default batch size.
-      as_iterable: If True, return an iterable which keeps yielding predictions
-        for each example until inputs are exhausted. Note: The inputs must
-        terminate if you want the iterable to terminate (e.g. be sure to pass
-        num_epochs=1 if you are using something like read_batch_features).
-      get_top_k : if set to true returns the top k classes otherwise returns
-        the top class.
-
-    Returns:
-      Numpy array of predicted classes (or an iterable of predicted classes if
-      as_iterable is True).
-    """
-    if get_top_k:
-      key = _TOP_K
-    else:
-      key = _CLASSES
-    preds = self._estimator.predict(x=x, input_fn=input_fn,
-                                    batch_size=batch_size, outputs=[key],
-                                    as_iterable=as_iterable)
-    if as_iterable:
-      return _as_iterable(preds, output=key)
-    return preds[key]
-
-  def predict_proba(self, x=None, input_fn=None, batch_size=None,
-                    as_iterable=False):
-    """Returns prediction probabilities for given features.
-
-    Args:
-      x: features.
-      input_fn: Input function. If set, x and y must be None.
-      batch_size: Override default batch size.
-      as_iterable: If True, return an iterable which keeps yielding predictions
-        for each example until inputs are exhausted. Note: The inputs must
-        terminate if you want the iterable to terminate (e.g. be sure to pass
-        num_epochs=1 if you are using something like read_batch_features).
-
-    Returns:
-      Numpy array of predicted probabilities (or an iterable of predicted
-      probabilities if as_iterable is True).
-    """
-    preds = self._estimator.predict(x=x, input_fn=input_fn,
-                                    batch_size=batch_size,
-                                    outputs=[_PROBABILITIES],
-                                    as_iterable=as_iterable)
-    if as_iterable:
-      return _as_iterable(preds, output=_PROBABILITIES)
-    return preds[_PROBABILITIES]
-
-  def export(self, export_dir, signature_fn=None,
-             input_fn=None, default_batch_size=1,
-             exports_to_keep=None):
-    """Exports inference graph into given dir.
-
-    Args:
-      export_dir: A string containing a directory to write the exported graph
-        and checkpoints.
-      signature_fn: Function that returns a default signature and a named
-        signature map, given `Tensor` of `Example` strings, `dict` of `Tensor`s
-        for features and `Tensor` or `dict` of `Tensor`s for predictions.
-      input_fn: If `use_deprecated_input_fn` is true, then a function that given
-        `Tensor` of `Example` strings, parses it into features that are then
-        passed to the model. Otherwise, a function that takes no argument and
-        returns a tuple of (features, targets), where features is a dict of
-        string key to `Tensor` and targets is a `Tensor` that's currently not
-        used (and so can be `None`).
-      default_batch_size: Default batch size of the `Example` placeholder.
-      exports_to_keep: Number of exports to keep.
-
-    Returns:
-      The string path to the exported directory. NB: this functionality was
-      added ca. 2016/09/25; clients that depend on the return value may need
-      to handle the case where this function returns None because subclasses
-      are not returning a value.
-    """
-    def default_input_fn(unused_estimator, examples):
-      return layers.parse_feature_columns_from_examples(
-          examples, self._feature_columns)
-    return self._estimator.export(export_dir=export_dir,
-                                  signature_fn=signature_fn,
-                                  input_fn=input_fn or default_input_fn,
-                                  default_batch_size=default_batch_size,
-                                  exports_to_keep=exports_to_keep)
-
-  def get_variable_names(self):
-    return self._estimator.get_variable_names()
-
-  @property
-  def model_dir(self):
-    return self._model_dir
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier_test.py
deleted file mode 100644
index 3a58479aff8..00000000000
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier_test.py
+++ /dev/null
@@ -1,459 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# ==============================================================================
-"""Tests for DNNSampledSoftmaxClassifier estimator."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-import tempfile
-
-import numpy as np
-import tensorflow as tf
-
-from tensorflow.contrib.learn.python.learn.estimators import dnn_sampled_softmax_classifier
-from tensorflow.python.ops import math_ops
-
-
-class DNNSampledSoftmaxClassifierTest(tf.test.TestCase):
-
-  def testMultiClass(self):
-    """Tests the following.
-
-    1. Tests fit() and evaluate() calls.
-    2. Tests the use of a non default optimizer.
-    3. Tests the output of get_variable_names().
-    Note that the training output is not verified because it is flaky with the
-    Iris dataset.
-    """
-    def _iris_input_fn():
-      iris = tf.contrib.learn.datasets.load_iris()
-      return {
-          'feature': tf.constant(iris.data, dtype=tf.float32)
-      }, tf.constant(iris.target, shape=[150, 1], dtype=tf.int64)
-
-    cont_features = [
-        tf.contrib.layers.real_valued_column('feature', dimension=4)]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=1,
-        n_labels=1,
-        feature_columns=cont_features,
-        hidden_units=[3, 3])
-
-    classifier.fit(input_fn=_iris_input_fn, steps=5)
-    classifier.evaluate(input_fn=_iris_input_fn, steps=1)
-    var_names = classifier.get_variable_names()
-    self.assertGreater(len(var_names), 6)
-
-  def testNonDictFeatures(self):
-    """Tests non-dictionary features runs without error."""
-
-    def _iris_input_fn():
-      iris = tf.contrib.learn.datasets.load_iris()
-      return (tf.constant(
-          iris.data, dtype=tf.float32), tf.constant(
-              iris.target, shape=[150, 1], dtype=tf.int64))
-
-    cont_features = [tf.contrib.layers.real_valued_column('', dimension=4)]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=1,
-        n_labels=1,
-        feature_columns=cont_features,
-        hidden_units=[3, 3])
-
-    classifier.fit(input_fn=_iris_input_fn, steps=5)
-    classifier.evaluate(input_fn=_iris_input_fn, steps=1)
-
-  def testOneDimensionTargets(self):
-    """Tests one dimensional targets runs without error."""
-
-    def _input_fn():
-      return {
-          'feature': tf.constant(
-              [1, 1, 1], dtype=tf.float32)
-      }, tf.constant(
-          [3, 5, 7], dtype=tf.int64)
-
-    cont_features = [
-        tf.contrib.layers.real_valued_column(
-            'feature', dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=10,
-        n_samples=1,
-        n_labels=1,
-        feature_columns=cont_features,
-        hidden_units=[3, 3])
-
-    classifier.fit(input_fn=_input_fn, steps=5)
-    classifier.evaluate(input_fn=_input_fn, steps=1)
-
-  def testWrongDimensionTargets(self):
-    """Tests one dimensional targets runs without error."""
-
-    def _input_fn():
-      return {
-          'feature': tf.constant(
-              [1, 1, 1], dtype=tf.float32)
-      }, tf.constant(
-          [[[3, 5, 7]]], dtype=tf.int64)
-
-    cont_features = [
-        tf.contrib.layers.real_valued_column(
-            'feature', dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=10,
-        n_samples=1,
-        n_labels=1,
-        feature_columns=cont_features,
-        hidden_units=[3, 3])
-
-    with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, 'target'):
-      classifier.fit(input_fn=_input_fn, steps=5)
-
-  def testTrainWithPartitionedVariables(self):
-    """Tests the following.
-
-    1. Tests training with partitioned variables.
-    2. Test that the model actually trains.
-    3. Tests the output of evaluate() and predict().
-    """
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    # The given hash_bucket_size results in variables larger than the
-    # default min_slice_size attribute, so the variables are partitioned.
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=2e7)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4],
-        # Because we did not start a distributed cluster, we need to pass an
-        # empty ClusterSpec, otherwise the device_setter will look for
-        # distributed jobs, such as "/job:ps" which are not present.
-        config=tf.contrib.learn.RunConfig(
-            num_ps_replicas=2, cluster_spec=tf.train.ClusterSpec({}),
-            tf_random_seed=5))
-
-    # Test that the model actually trains.
-    classifier.fit(input_fn=_input_fn, steps=50)
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1)
-    self.assertGreater(evaluate_output['precision_at_1'], 0.6)
-    self.assertGreater(evaluate_output['recall_at_1'], 0.6)
-
-    # Test the output of predict()
-    predict_output = classifier.predict(input_fn=_input_fn)
-    self.assertListEqual([3], list(predict_output.shape))
-    # TODO(dnivara): Setup this test such that it is not flaky and predict() and
-    # evaluate() outputs can be tested.
-
-  def testTrainSaveLoad(self):
-    """Tests that ensure that you can save and reload a trained model."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=10)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    model_dir = tempfile.mkdtemp()
-    classifier1 = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        model_dir=model_dir,
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4])
-
-    classifier1.fit(input_fn=_input_fn, steps=1)
-    predict_output1 = classifier1.predict(input_fn=_input_fn)
-    del classifier1
-
-    classifier2 = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        model_dir=model_dir,
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4])
-
-    predict_output2 = classifier2.predict(input_fn=_input_fn)
-    self.assertEqual(list(predict_output1), list(predict_output2))
-
-  def testCustomOptimizerByObject(self):
-    """Tests the use of custom optimizer."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4],
-        optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
-        config=tf.contrib.learn.RunConfig(tf_random_seed=5))
-
-    # Test that the model actually trains.
-    classifier.fit(input_fn=_input_fn, steps=50)
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1)
-    self.assertGreater(evaluate_output['precision_at_1'], 0.9)
-    self.assertGreater(evaluate_output['recall_at_1'], 0.9)
-
-    # Test the output of predict()
-    predict_output = classifier.predict(input_fn=_input_fn)
-    self.assertListEqual([1, 0, 0], list(predict_output))
-
-  def testCustomOptimizerByFunction(self):
-    """Tests the use of custom optimizer."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-    def _optimizer_exp_decay():
-      global_step = tf.contrib.framework.get_global_step()
-      learning_rate = tf.train.exponential_decay(learning_rate=0.01,
-                                                 global_step=global_step,
-                                                 decay_steps=100,
-                                                 decay_rate=0.001)
-      return tf.train.AdagradOptimizer(learning_rate=learning_rate)
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4],
-        optimizer=_optimizer_exp_decay,
-        config=tf.contrib.learn.RunConfig(tf_random_seed=5))
-
-    # Test that the model actually trains.
-    classifier.fit(input_fn=_input_fn, steps=50)
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1)
-    self.assertGreater(evaluate_output['precision_at_1'], 0.6)
-    self.assertGreater(evaluate_output['recall_at_1'], 0.6)
-
-  def testExport(self):
-    """Tests that export model for servo works."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=100)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4])
-
-    export_dir = tempfile.mkdtemp()
-    classifier.fit(input_fn=_input_fn, steps=50)
-    classifier.export(export_dir)
-
-  def testPredictAsIterable(self):
-    """Tests predict() and predict_proba() call with as_iterable set to True."""
-    def _input_fn(num_epochs=None):
-      features = {
-          'age': tf.train.limit_epochs(tf.constant([[.9], [.1], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    feature_columns = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1),
-        tf.contrib.layers.real_valued_column('age')
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=feature_columns,
-        hidden_units=[4, 4])
-
-    classifier.fit(input_fn=_input_fn, steps=1)
-
-    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
-    # Test the output of predict() and predict_proba() with as_iterable=True
-    predictions = list(
-        classifier.predict(input_fn=predict_input_fn, as_iterable=True))
-    predictions_proba = list(
-        classifier.predict_proba(input_fn=predict_input_fn, as_iterable=True))
-    self.assertTrue(np.array_equal(predictions,
-                                   np.argmax(predictions_proba, 1)))
-
-  def testCustomMetrics(self):
-    """Tests the use of custom metric."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[1], [0], [0]], dtype=tf.int64)
-      return features, target
-
-    def _my_metric_op(predictions, targets):
-      """Simply multiplies predictions and targets to return [1, 0 , 0]."""
-      prediction_classes = math_ops.argmax(predictions, 1)
-      return tf.mul(prediction_classes, tf.reshape(targets, [-1]))
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=1,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4],
-        optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
-        config=tf.contrib.learn.RunConfig(tf_random_seed=5))
-
-    # Test that the model actually trains.
-    classifier.fit(input_fn=_input_fn, steps=50)
-    metrics = {('my_metric', 'probabilities'): _my_metric_op}
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1,
-                                          metrics=metrics)
-    self.assertListEqual([1, 0, 0], list(evaluate_output['my_metric']))
-
-  def testMultiLabelTopKWithCustomMetrics(self):
-    """Tests the cases where n_labels>1 top_k>1 and custom metrics on top_k."""
-    def _input_fn():
-      features = {
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
-      }
-      target = tf.constant([[0, 1], [0, 1], [0, 1]], dtype=tf.int64)
-      return features, target
-
-    def _my_metric_op(predictions, targets):
-      """Simply adds the predictions and targets."""
-      return tf.add(math_ops.to_int64(predictions), targets)
-
-    sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket(
-        'language', hash_bucket_size=20)
-    embedding_features = [
-        tf.contrib.layers.embedding_column(sparse_column, dimension=1)
-    ]
-
-    classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier(
-        n_classes=3,
-        n_samples=2,
-        n_labels=2,
-        top_k=2,
-        feature_columns=embedding_features,
-        hidden_units=[4, 4],
-        optimizer=tf.train.AdamOptimizer(learning_rate=0.01),
-        config=tf.contrib.learn.RunConfig(tf_random_seed=5))
-
-    classifier.fit(input_fn=_input_fn, steps=50)
-    # evaluate() without custom metrics.
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1)
-    self.assertGreater(evaluate_output['precision_at_1'], 0.4)
-    self.assertGreater(evaluate_output['recall_at_1'], 0.4)
-    self.assertGreater(evaluate_output['precision_at_2'], 0.4)
-    self.assertGreater(evaluate_output['recall_at_2'], 0.4)
-    self.assertGreater(evaluate_output['average_precision_at_2'], 0.4)
-
-    # evaluate() with custom metrics.
-    metrics = {('my_metric', 'top_k'): _my_metric_op}
-    evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1,
-                                          metrics=metrics)
-    # This test's output is flaky so just testing that 'my_metric' is indeed
-    # part of the evaluate_output.
-    self.assertTrue('my_metric' in evaluate_output)
-
-    # predict() with top_k.
-    predict_output = classifier.predict(input_fn=_input_fn, get_top_k=True)
-    self.assertListEqual([3, 2], list(predict_output.shape))
-    # TODO(dnivara): Setup this test such that it is not flaky and predict() and
-    # evaluate() outputs can be tested.
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
index 7bd013baa8a..c09b2d682be 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
@@ -114,11 +114,13 @@ class DNNClassifierTest(tf.test.TestCase):
     """Tests binary classification using tensor data as input."""
     def _input_fn(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [0.2], [.1]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       return features, tf.constant([[1], [0], [0]], dtype=tf.int32)
 
@@ -149,11 +151,13 @@ class DNNClassifierTest(tf.test.TestCase):
     """Tests binary classification with float labels."""
     def _input_fn_float_label(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[50], [20], [10]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[50], [20], [10]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       target = tf.constant([[0.8], [0.], [0.2]], dtype=tf.float32)
       return features, target
@@ -334,11 +338,13 @@ class DNNClassifierTest(tf.test.TestCase):
     """Tests predict and predict_prob methods with as_iterable=False."""
     def _input_fn(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       return features, tf.constant([[1], [0], [0]], dtype=tf.int32)
 
@@ -370,11 +376,13 @@ class DNNClassifierTest(tf.test.TestCase):
     """Tests predict and predict_prob methods with as_iterable=True."""
     def _input_fn(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       return features, tf.constant([[1], [0], [0]], dtype=tf.int32)
 
@@ -407,15 +415,19 @@ class DNNClassifierTest(tf.test.TestCase):
 
   def testCustomMetrics(self):
     """Tests custom evaluation metrics."""
-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
       target = tf.constant([[1], [0], [0], [0]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {
+          'x': tf.train.limit_epochs(
+              tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs),
+      }
       return features, target
 
     def _my_metric_op(predictions, targets):
       # For the case of binary classification, the 2nd column of "predictions"
       # denotes the model predictions.
+      targets = tf.to_float(targets)
       predictions = tf.slice(predictions, [0, 1], [-1, 1])
       targets = math_ops.cast(targets, predictions.dtype)
       return tf.reduce_sum(tf.mul(predictions, targets))
@@ -425,9 +437,9 @@ class DNNClassifierTest(tf.test.TestCase):
         hidden_units=[3, 3],
         config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
-    classifier.fit(input_fn=_input_fn_train, steps=100)
+    classifier.fit(input_fn=_input_fn, steps=100)
     scores = classifier.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
         steps=100,
         metrics={
             'my_accuracy': MetricSpec(
@@ -443,7 +455,8 @@ class DNNClassifierTest(tf.test.TestCase):
     self.assertTrue(
         set(['loss', 'my_accuracy', 'my_precision', 'my_metric'
             ]).issubset(set(scores.keys())))
-    predictions = classifier.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(list(classifier.predict(input_fn=predict_input_fn)))
     self.assertEqual(_sklearn.accuracy_score([1, 0, 0, 0], predictions),
                      scores['my_accuracy'])
 
@@ -451,7 +464,7 @@ class DNNClassifierTest(tf.test.TestCase):
     # "probabilities".
     with self.assertRaisesRegexp(KeyError, 'bad_type'):
       classifier.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
           steps=100,
           metrics={
               'bad_name': MetricSpec(
@@ -462,11 +475,13 @@ class DNNClassifierTest(tf.test.TestCase):
     """Tests that insures you can save and reload a trained model."""
     def _input_fn(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       return features, tf.constant([[1], [0], [0]], dtype=tf.int32)
 
@@ -485,7 +500,8 @@ class DNNClassifierTest(tf.test.TestCase):
         config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
     classifier.fit(input_fn=_input_fn, steps=100)
-    predictions1 = classifier.predict(input_fn=_input_fn)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions1 = classifier.predict(input_fn=predict_input_fn)
     del classifier
 
     classifier2 = tf.contrib.learn.DNNClassifier(
@@ -494,18 +510,20 @@ class DNNClassifierTest(tf.test.TestCase):
         feature_columns=feature_columns,
         hidden_units=[3, 3],
         config=tf.contrib.learn.RunConfig(tf_random_seed=1))
-    predictions2 = classifier2.predict(input_fn=_input_fn)
+    predictions2 = classifier2.predict(input_fn=predict_input_fn)
     self.assertEqual(list(predictions1), list(predictions2))
 
   def testTrainWithPartitionedVariables(self):
     """Tests training with partitioned variables."""
     def _input_fn(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       return features, tf.constant([[1], [0], [0]], dtype=tf.int32)
 
@@ -636,11 +654,13 @@ class DNNRegressorTest(tf.test.TestCase):
     """Tests regression using tensor data as input."""
     def _input_fn(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[.8], [.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       return features, tf.constant([1., 0., 0.2], dtype=tf.float32)
 
@@ -756,11 +776,13 @@ class DNNRegressorTest(tf.test.TestCase):
     target = [1., 0., 0.2]
     def _input_fn(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       return features, tf.constant(target, dtype=tf.float32)
 
@@ -788,11 +810,13 @@ class DNNRegressorTest(tf.test.TestCase):
     target = [1., 0., 0.2]
     def _input_fn(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       return features, tf.constant(target, dtype=tf.float32)
 
@@ -819,10 +843,13 @@ class DNNRegressorTest(tf.test.TestCase):
 
   def testCustomMetrics(self):
     """Tests custom evaluation metrics."""
-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
       target = tf.constant([[1.], [0.], [0.], [0.]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {
+          'x': tf.train.limit_epochs(
+              tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs),
+      }
       return features, target
 
     def _my_metric_op(predictions, targets):
@@ -833,9 +860,9 @@ class DNNRegressorTest(tf.test.TestCase):
         hidden_units=[3, 3],
         config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
-    regressor.fit(input_fn=_input_fn_train, steps=100)
+    regressor.fit(input_fn=_input_fn, steps=100)
     scores = regressor.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
         steps=1,
         metrics={
             'my_error': tf.contrib.metrics.streaming_mean_squared_error,
@@ -844,28 +871,31 @@ class DNNRegressorTest(tf.test.TestCase):
     self.assertIn('loss', set(scores.keys()))
     self.assertIn('my_error', set(scores.keys()))
     self.assertIn('my_metric', set(scores.keys()))
-    predictions = regressor.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(list(regressor.predict(input_fn=predict_input_fn)))
     self.assertAlmostEqual(
         _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
         scores['my_error'])
 
     # Tests that when the key is a tuple, an error is raised.
-    with self.assertRaises(TypeError):
+    with self.assertRaises(KeyError):
       regressor.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
           steps=1,
-          metrics={('my_error', 'predictions'
-                   ): tf.contrib.metrics.streaming_mean_squared_error})
+          metrics={('my_error', 'predictions'):
+                   tf.contrib.metrics.streaming_mean_squared_error})
 
   def testTrainSaveLoad(self):
     """Tests that insures you can save and reload a trained model."""
     def _input_fn(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       return features, tf.constant([1., 0., 0.2], dtype=tf.float32)
 
@@ -900,11 +930,13 @@ class DNNRegressorTest(tf.test.TestCase):
     """Tests training with partitioned variables."""
     def _input_fn(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       return features, tf.constant([1., 0., 0.2], dtype=tf.float32)
 
@@ -936,11 +968,13 @@ class DNNRegressorTest(tf.test.TestCase):
     """Tests that we can disable centered bias."""
     def _input_fn(num_epochs=None):
       features = {
-          'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]),
-                                       num_epochs=num_epochs),
-          'language': tf.SparseTensor(values=['en', 'fr', 'zh'],
-                                      indices=[[0, 0], [0, 1], [2, 0]],
-                                      shape=[3, 2])
+          'age': tf.train.limit_epochs(
+              tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs),
+          'language': tf.SparseTensor(
+              values=tf.train.limit_epochs(
+                  ['en', 'fr', 'zh'], num_epochs=num_epochs),
+              indices=[[0, 0], [0, 1], [2, 0]],
+              shape=[3, 2])
       }
       return features, tf.constant([1., 0., 0.2], dtype=tf.float32)
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py
index a38d57effa7..249ec2ca391 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py
@@ -27,6 +27,7 @@ from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import rnn
@@ -119,6 +120,55 @@ def _select_last_activations(activations, sequence_lengths):
     return last_activations
 
 
+def _concatenate_context_input(sequence_input, context_input):
+  """Replicates `context_input` accross all timesteps of `sequence_input`.
+
+  Expands dimension 1 of `context_input` then tiles it `sequence_length` times.
+  This value is appended to `sequence_input` on dimension 2 and the result is
+  returned.
+
+  Args:
+    sequence_input: a `Tensor` of dtype `float32` and shape `[batch_size,
+      padded_length, d0]`.
+    context_input: a `Tensor` of dtype `float32` and shape `[batch_size, d1]`.
+
+  Returns:
+    A `Tensor` of dtype `float32` and shape `[batch_size, padded_length,
+    d0 + d1]`.
+
+  Raises:
+    ValueError: if `sequence_input` does not have rank 3 or `context_input` does
+      not have rank 2.
+  """
+  seq_rank_check = check_ops.assert_rank(
+      sequence_input,
+      3,
+      message='sequence_input must have rank 3',
+      data=[array_ops.shape(sequence_input)])
+  seq_type_check = check_ops.assert_type(
+      sequence_input,
+      dtypes.float32,
+      message='sequence_input must have dtype float32; got {}.'.format(
+          sequence_input.dtype))
+  ctx_rank_check = check_ops.assert_rank(
+      context_input,
+      2,
+      message='context_input must have rank 2',
+      data=[array_ops.shape(context_input)])
+  ctx_type_check = check_ops.assert_type(
+      context_input,
+      dtypes.float32,
+      message='context_input must have dtype float32; got {}.'.format(
+          context_input.dtype))
+  with ops.control_dependencies(
+      [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]):
+    padded_length = array_ops.shape(sequence_input)[1]
+    tiled_context_input = array_ops.tile(
+        array_ops.expand_dims(context_input, 1),
+        array_ops.concat(0, [[1], [padded_length], [1]]))
+  return array_ops.concat(2, [sequence_input, tiled_context_input])
+
+
 @six.add_metaclass(abc.ABCMeta)
 class _DynamicRNNEstimator(estimator.BaseEstimator):
   """Estimator that uses a dynamic RNN for sequences."""
@@ -127,10 +177,11 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
                cell,
                target_column,
                optimizer,
+               sequence_feature_columns,
+               context_feature_columns=None,
                model_dir=None,
                config=None,
                gradient_clipping_norm=None,
-               inputs_key='inputs',
                sequence_length_key='sequence_length',
                initial_state_key='initial_state',
                dtype=None,
@@ -145,13 +196,18 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
       target_column: an initialized `TargetColumn`, used to calculate loss and
         metrics.
       optimizer: an initialized `tensorflow.Optimizer`.
+      sequence_feature_columns: An iterable containing all the feature columns
+        describing sequence features. All items in the set should be instances
+        of classes derived from `FeatureColumn`.
+      context_feature_columns: An iterable containing all the feature columns
+        describing context features i.e. features that apply accross all time
+        steps. All items in the set should be instances of classes derived from
+        `FeatureColumn`.
       model_dir: The directory in which to save and restore the model graph,
         parameters, etc.
       config: A `RunConfig` instance.
       gradient_clipping_norm: parameter used for gradient clipping. If `None`,
         then no clipping is performed.
-      inputs_key: the key for input values in the features dict passed to
-        `fit()`.
       sequence_length_key: the key for the sequence length tensor in the
         features dict passed to `fit()`.
       initial_state_key: the key for input values in the features dict passed to
@@ -168,14 +224,20 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
                         targets which are the output of `input_fn` and
                         returns features and targets which will be fed
                         into the model.
+    Raises:
+      ValueError: `sequence_feature_columns` is `None` or [].
     """
     super(_DynamicRNNEstimator, self).__init__(
         model_dir=model_dir, config=config)
+    # TODO(jamieas): consider supporting models with only context features.
+    if not sequence_feature_columns:
+      raise ValueError('sequence_feature_columns must be a non-empty list.')
     self._cell = cell
     self._target_column = target_column
     self._optimizer = optimizer
+    self._context_feature_columns = context_feature_columns
+    self._sequence_feature_columns = sequence_feature_columns
     self._gradient_clipping_norm = gradient_clipping_norm
-    self._inputs_key = inputs_key
     self._sequence_length_key = sequence_length_key
     self._initial_state_key = initial_state_key
     self._dtype = dtype or dtypes.float32
@@ -186,7 +248,29 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
         feature_engineering_fn or
         (lambda features, targets: (features, targets)))
 
-  def _construct_rnn(self, features):
+  def _get_model_input(self, features, weight_collections=None, scope=None):
+    # TODO(jamieas): add option to use context to construct initial state rather
+    # than appending it to sequence input.
+    initial_state = features.get(self._initial_state_key)
+
+    sequence_input = layers.sequence_input_from_feature_columns(
+        columns_to_tensors=features,
+        feature_columns=self._sequence_feature_columns,
+        weight_collections=weight_collections,
+        scope=scope)
+
+    if self._context_feature_columns is not None:
+      context_input = layers.input_from_feature_columns(
+          columns_to_tensors=features,
+          feature_columns=self._context_feature_columns,
+          weight_collections=weight_collections,
+          scope=scope)
+
+      sequence_input = _concatenate_context_input(sequence_input, context_input)
+
+    return initial_state, sequence_input
+
+  def _construct_rnn(self, initial_state, sequence_input):
     """Apply an RNN to `features`.
 
     The `features` dict must contain `self._inputs_key`, and the corresponding
@@ -201,28 +285,20 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
     `self._dtype`.
 
     Args:
-      features: a `dict` containing the input for the RNN and (optionally) an
-        initial state and information about sequence lengths.
+      initial_state: the initial state to pass the the RNN. If `None`, the
+        default starting state for `self._cell` is used.
+      sequence_input: a `Tensor` with shape `[batch_size, padded_length, d]`
+        that will be passed as input to the RNN.
 
     Returns:
       activations: the output of the RNN, projected to the appropriate number of
         dimensions.
       final_state: the final state output by the RNN.
-
-    Raises:
-      KeyError: if `features` does not contain `self._inputs_key`.
     """
     with ops.name_scope('RNN'):
-      inputs = features.get(self._inputs_key)
-      if inputs is None:
-        raise KeyError('features must contain the key {}'.format(
-            self._inputs_key))
-      if inputs.dtype != self._dtype:
-        inputs = math_ops.cast(inputs, self._dtype)
-      initial_state = features.get(self._initial_state_key)
       rnn_outputs, final_state = rnn.dynamic_rnn(
           cell=self._cell,
-          inputs=inputs,
+          inputs=sequence_input,
           initial_state=initial_state,
           dtype=self._dtype,
           parallel_iterations=self._parallel_iterations,
@@ -320,26 +396,26 @@ class _DynamicRNNEstimator(estimator.BaseEstimator):
 
   def _get_train_ops(self, features, targets):
     with ops.name_scope(self._name):
-      if isinstance(features, ops.Tensor):
-        features = {self._inputs_key: features}
-      activations, _ = self._construct_rnn(features)
+      features, targets = self._feature_engineering_fn(features, targets)
+      initial_state, sequence_input = self._get_model_input(features)
+      activations, _ = self._construct_rnn(initial_state, sequence_input)
       loss = self._activations_to_loss(features, activations, targets)
       train_op = self._loss_to_train_op(loss)
       return train_op, loss
 
   def _get_eval_ops(self, features, targets, metrics):
     with ops.name_scope(self._name):
-      if isinstance(features, ops.Tensor):
-        features = {self._inputs_key: features}
-      activations, _ = self._construct_rnn(features)
+      features, targets = self._feature_engineering_fn(features, targets)
+      initial_state, sequence_input = self._get_model_input(features)
+      activations, _ = self._construct_rnn(initial_state, sequence_input)
       return self._activations_to_eval_ops(features, activations, targets,
                                            metrics)
 
   def _get_predict_ops(self, features):
     with ops.name_scope(self._name):
-      if isinstance(features, ops.Tensor):
-        features = {self._inputs_key: features}
-      activations, state = self._construct_rnn(features)
+      features, _ = self._feature_engineering_fn(features, {})
+      initial_state, sequence_input = self._get_model_input(features)
+      activations, state = self._construct_rnn(initial_state, sequence_input)
       predictions = self._activations_to_predictions(features, activations)
       return {'predictions': predictions, 'state': state}
 
@@ -362,7 +438,7 @@ class _MultiValueRNNEstimator(_DynamicRNNEstimator):
       activations_shape = array_ops.shape(activations)
       flattened_activations = array_ops.reshape(activations,
                                                 [-1, activations_shape[2]])
-      predictions = self._target_column.activations_to_predictions(
+      predictions = self._target_column.logits_to_predictions(
           flattened_activations, proba=False)
       reshaped_predictions = array_ops.reshape(
           predictions, [activations_shape[0], activations_shape[1], -1])
@@ -392,7 +468,7 @@ class _SingleValueRNNEstimator(_DynamicRNNEstimator):
     with ops.name_scope('activations_to_predictions'):
       sequence_lengths = features.get(self._sequence_length_key)
       last_activations = _select_last_activations(activations, sequence_lengths)
-      return self._target_column.activations_to_predictions(
+      return self._target_column.logits_to_predictions(
           last_activations, proba=False)
 
   def _activations_to_eval_ops(self, features, activations, targets, metrics):
@@ -469,6 +545,8 @@ def _get_rnn_cell(cell_type, num_units, num_layers):
 
 
 def multi_value_rnn_regressor(num_units,
+                              sequence_feature_columns,
+                              context_feature_columns=None,
                               cell_type='basic_rnn',
                               cell_dtype=dtypes.float32,
                               num_rnn_layers=1,
@@ -482,6 +560,13 @@ def multi_value_rnn_regressor(num_units,
 
   Args:
     num_units: the size of the RNN cells.
+    sequence_feature_columns: An iterable containing all the feature columns
+      describing sequence features. All items in the set should be instances
+      of classes derived from `FeatureColumn`.
+    context_feature_columns: An iterable containing all the feature columns
+      describing context features i.e. features that apply accross all time
+      steps. All items in the set should be instances of classes derived from
+      `FeatureColumn`.
     cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'.
     cell_dtype: the dtype of the state and output for the given `cell_type`.
     num_rnn_layers: number of RNN layers.
@@ -503,6 +588,8 @@ def multi_value_rnn_regressor(num_units,
   return _MultiValueRNNEstimator(cell,
                                  target_column,
                                  optimizer,
+                                 sequence_feature_columns,
+                                 context_feature_columns,
                                  model_dir,
                                  config,
                                  gradient_clipping_norm,
@@ -511,6 +598,8 @@ def multi_value_rnn_regressor(num_units,
 
 def multi_value_rnn_classifier(num_classes,
                                num_units,
+                               sequence_feature_columns,
+                               context_feature_columns=None,
                                cell_type='basic_rnn',
                                cell_dtype=dtypes.float32,
                                num_rnn_layers=1,
@@ -525,6 +614,13 @@ def multi_value_rnn_classifier(num_classes,
   Args:
     num_classes: the number of classes for categorization.
     num_units: the size of the RNN cells.
+    sequence_feature_columns: An iterable containing all the feature columns
+      describing sequence features. All items in the set should be instances
+      of classes derived from `FeatureColumn`.
+    context_feature_columns: An iterable containing all the feature columns
+      describing context features i.e. features that apply accross all time
+      steps. All items in the set should be instances of classes derived from
+      `FeatureColumn`.
     cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'.
     cell_dtype: the dtype of the state and output for the given `cell_type`.
     num_rnn_layers: number of RNN layers.
@@ -546,6 +642,8 @@ def multi_value_rnn_classifier(num_classes,
   return _MultiValueRNNEstimator(cell,
                                  target_column,
                                  optimizer,
+                                 sequence_feature_columns,
+                                 context_feature_columns,
                                  model_dir,
                                  config,
                                  gradient_clipping_norm,
@@ -553,6 +651,8 @@ def multi_value_rnn_classifier(num_classes,
 
 
 def single_value_rnn_regressor(num_units,
+                               sequence_feature_columns,
+                               context_feature_columns=None,
                                cell_type='basic_rnn',
                                cell_dtype=dtypes.float32,
                                num_rnn_layers=1,
@@ -566,6 +666,13 @@ def single_value_rnn_regressor(num_units,
 
   Args:
     num_units: the size of the RNN cells.
+    sequence_feature_columns: An iterable containing all the feature columns
+      describing sequence features. All items in the set should be instances
+      of classes derived from `FeatureColumn`.
+    context_feature_columns: An iterable containing all the feature columns
+      describing context features i.e. features that apply accross all time
+      steps. All items in the set should be instances of classes derived from
+      `FeatureColumn`.
     cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'.
     cell_dtype: the dtype of the state and output for the given `cell_type`.
     num_rnn_layers: number of RNN layers.
@@ -587,6 +694,8 @@ def single_value_rnn_regressor(num_units,
   return _SingleValueRNNEstimator(cell,
                                   target_column,
                                   optimizer,
+                                  sequence_feature_columns,
+                                  context_feature_columns,
                                   model_dir,
                                   config,
                                   gradient_clipping_norm,
@@ -595,6 +704,8 @@ def single_value_rnn_regressor(num_units,
 
 def single_value_rnn_classifier(num_classes,
                                 num_units,
+                                sequence_feature_columns,
+                                context_feature_columns=None,
                                 cell_type='basic_rnn',
                                 cell_dtype=dtypes.float32,
                                 num_rnn_layers=1,
@@ -609,6 +720,13 @@ def single_value_rnn_classifier(num_classes,
   Args:
     num_classes: the number of classes for categorization.
     num_units: the size of the RNN cells.
+    sequence_feature_columns: An iterable containing all the feature columns
+      describing sequence features. All items in the set should be instances
+      of classes derived from `FeatureColumn`.
+    context_feature_columns: An iterable containing all the feature columns
+      describing context features i.e. features that apply accross all time
+      steps. All items in the set should be instances of classes derived from
+      `FeatureColumn`.
     cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'.
     cell_dtype: the dtype of the state and output for the given `cell_type`.
     num_rnn_layers: number of RNN layers.
@@ -630,6 +748,8 @@ def single_value_rnn_classifier(num_classes,
   return _SingleValueRNNEstimator(cell,
                                   target_column,
                                   optimizer,
+                                  sequence_feature_columns,
+                                  context_feature_columns,
                                   model_dir,
                                   config,
                                   gradient_clipping_norm,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py
index 1ee3a8dd608..f14e65fff55 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py
@@ -22,6 +22,7 @@ import numpy as np
 import tensorflow as tf
 
 from tensorflow.contrib.learn.python.learn.estimators import dynamic_rnn_estimator
+from tensorflow.python.ops import rnn_cell
 
 
 class IdentityRNNCell(tf.nn.rnn_cell.RNNCell):
@@ -44,16 +45,16 @@ class IdentityRNNCell(tf.nn.rnn_cell.RNNCell):
 
 class MockTargetColumn(object):
 
-  def __init__(self):
-    self._num_label_columns = None
+  def __init__(self, num_label_columns=None):
+    self._num_label_columns = num_label_columns
 
   def get_eval_ops(self, features, activations, targets, metrics):
     raise NotImplementedError(
         'MockTargetColumn.get_eval_ops called unexpectedly.')
 
-  def activations_to_predictions(self, flattened_activations, proba=False):
+  def logits_to_predictions(self, flattened_activations, proba=False):
     raise NotImplementedError(
-        'MockTargetColumn.activations_to_predictions called unexpectedly.')
+        'MockTargetColumn.logits_to_predictions called unexpectedly.')
 
   def loss(self, activations, targets, features):
     raise NotImplementedError('MockTargetColumn.loss called unexpectedly.')
@@ -88,67 +89,89 @@ def sequence_length_mask(values, lengths):
 
 class DynamicRnnEstimatorTest(tf.test.TestCase):
 
-  CELL_STATE_SIZE = 8
-  CELL_OUTPUT_SIZE = 6
+  NUM_RNN_CELL_UNITS = 8
+  NUM_LABEL_COLUMNS = 4
 
   def setUp(self):
-    self._rnn_cell = IdentityRNNCell(self.CELL_STATE_SIZE,
-                                     self.CELL_OUTPUT_SIZE)
-    self._mock_target_column = MockTargetColumn()
+    self._rnn_cell = rnn_cell.BasicRNNCell(self.NUM_RNN_CELL_UNITS)
+    self._mock_target_column = MockTargetColumn(
+        num_label_columns=self.NUM_LABEL_COLUMNS)
+
+    location = tf.contrib.layers.sparse_column_with_keys(
+        'location', keys=['west_side', 'east_side', 'nyc'])
+    location_onehot = tf.contrib.layers.one_hot_column(location)
+    context_features = [location_onehot]
+
+    wire_cast = tf.contrib.layers.sparse_column_with_keys(
+        'wire_cast', ['marlo', 'omar', 'stringer'])
+    wire_cast_embedded = tf.contrib.layers.embedding_column(
+        wire_cast, dimension=8)
+    measurements = tf.contrib.layers.real_valued_column(
+        'measurements', dimension=2)
+    sequence_features = [measurements, wire_cast_embedded]
+
     self._rnn_estimator = dynamic_rnn_estimator._MultiValueRNNEstimator(
         cell=self._rnn_cell,
+        sequence_feature_columns=sequence_features,
+        context_feature_columns=context_features,
         target_column=self._mock_target_column,
         optimizer=tf.train.GradientDescentOptimizer(0.1))
 
+    self._columns_to_tensors = {
+        'location': tf.SparseTensor(
+            indices=[[0, 0], [1, 0], [2, 0]],
+            values=['west_side', 'west_side', 'nyc'],
+            shape=[3, 1]),
+        'wire_cast': tf.SparseTensor(
+            indices=[[0, 0, 0], [0, 1, 0],
+                     [1, 0, 0], [1, 1, 0], [1, 1, 1],
+                     [2, 0, 0]],
+            values=[b'marlo', b'stringer',
+                    b'omar', b'stringer', b'marlo',
+                    b'marlo'],
+            shape=[3, 2, 2]),
+        'measurements': tf.random_uniform([3, 2, 2])}
+
+  def testGetModelInput(self):
+    initial_state, sequence_input = self._rnn_estimator._get_model_input(
+        self._columns_to_tensors)
+    self.assertIsNone(initial_state)
+    with self.test_session() as sess:
+      sess.run(tf.initialize_all_variables())
+      sess.run(tf.initialize_all_tables())
+      sequence_input_val = sess.run(sequence_input)
+    expected_shape = np.array([
+        3,         # expected batch size
+        2,         # padded sequence length
+        3 + 8 + 2  # location keys + embedding dim + measurement dimension
+    ])
+    self.assertAllEqual(expected_shape, sequence_input_val.shape)
+
   def testConstructRNN(self):
     """Test `DynamicRNNEstimator._construct_rnn`."""
-    batch_size = 4
-    padded_length = 6
-    num_classes = 4
+    initial_state, sequence_input = self._rnn_estimator._get_model_input(
+        self._columns_to_tensors)
+    activations_t, final_state_t = self._rnn_estimator._construct_rnn(
+        initial_state, sequence_input)
 
-    # Set up mocks
-    self._mock_target_column.set_num_label_columns(num_classes)
-    np.random.seed(111)
-    mock_linear_layer_output = np.random.rand(
-        batch_size, padded_length, num_classes)
-
-    # Create features
-    inputs = np.random.rand(batch_size, padded_length, self.CELL_OUTPUT_SIZE)
-    sequence_length = np.random.randint(0, padded_length + 1, batch_size)
-    features = {'inputs': tf.constant(
-        inputs, dtype=tf.float32),
-                'sequence_length': tf.constant(
-                    sequence_length, dtype=tf.int32)}
-
-    # Map feature to activations with mocked linear layer.
-    with tf.test.mock.patch.object(dynamic_rnn_estimator,
-                                   'layers') as mock_layers:
-      mock_layers.fully_connected.return_value = tf.constant(
-          mock_linear_layer_output, dtype=tf.float32)
-      activations_t, final_state_t = self._rnn_estimator._construct_rnn(
-          features)
-      _, fully_connected_kwargs = mock_layers.fully_connected.call_args
-      linear_layer_inputs_t = fully_connected_kwargs['inputs']
-      linear_layer_output_dim = fully_connected_kwargs['num_outputs']
-
-    # Obtain values of linear layer input, activations and final state.
+    # Obtain values of activations and final state.
     with tf.Session() as sess:
       sess.run(tf.initialize_all_variables())
-      linear_layer_inputs, activations, final_state = sess.run(
-          [linear_layer_inputs_t, activations_t, final_state_t])
+      sess.run(tf.initialize_all_tables())
+      activations, final_state = sess.run([activations_t, final_state_t])
 
-    np.testing.assert_equal(num_classes, linear_layer_output_dim)
-    np.testing.assert_almost_equal(inputs, linear_layer_inputs)
-    np.testing.assert_almost_equal(mock_linear_layer_output, activations)
-    np.testing.assert_almost_equal(
-        np.zeros([batch_size, self._rnn_cell.state_size], dtype=float),
-        final_state)
+    expected_activations_shape = np.array([3, 2, self.NUM_LABEL_COLUMNS])
+    self.assertAllEqual(expected_activations_shape, activations.shape)
+    expected_state_shape = np.array([3, self.NUM_RNN_CELL_UNITS])
+    self.assertAllEqual(expected_state_shape, final_state.shape)
 
 
 class MultiValueRNNEstimatorTest(tf.test.TestCase):
   """Tests for `_MultiValueRNNEstimator` class."""
   CELL_STATE_SIZE = 8
   CELL_OUTPUT_SIZE = 6
+  INPUTS_COLUMN = tf.contrib.layers.real_valued_column(
+      'inputs', dimension=CELL_OUTPUT_SIZE)
 
   def setUp(self):
     self._rnn_cell = IdentityRNNCell(self.CELL_STATE_SIZE,
@@ -156,6 +179,7 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase):
     self._mock_target_column = MockTargetColumn()
     self._seq_estimator = dynamic_rnn_estimator._MultiValueRNNEstimator(
         cell=self._rnn_cell,
+        sequence_feature_columns=[self.INPUTS_COLUMN],
         target_column=self._mock_target_column,
         optimizer=tf.train.GradientDescentOptimizer(0.1))
 
@@ -251,13 +275,13 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase):
 
     with tf.test.mock.patch.object(
         self._mock_target_column,
-        'activations_to_predictions',
+        'logits_to_predictions',
         return_value=flattened_argmax,
-        autospec=True) as mock_activations_to_predictions:
+        autospec=True) as mock_logits_to_predictions:
       predictions_t = self._seq_estimator._activations_to_predictions(
           None, tf.constant(activations, dtype=tf.float32))
       (target_column_input_activations_t,
-      ), _ = mock_activations_to_predictions.call_args
+      ), _ = mock_logits_to_predictions.call_args
 
     with tf.Session() as sess:
       target_column_input_activations, predictions = sess.run(
@@ -294,9 +318,14 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase):
 
       return input_fn
 
+    seq_columns = [tf.contrib.layers.real_valued_column(
+        'inputs', dimension=cell_size)]
     config = tf.contrib.learn.RunConfig(tf_random_seed=1234)
     sequence_estimator = dynamic_rnn_estimator.multi_value_rnn_regressor(
-        num_units=cell_size, learning_rate=learning_rate, config=config)
+        num_units=cell_size,
+        sequence_feature_columns=seq_columns,
+        learning_rate=learning_rate,
+        config=config)
 
     train_input_fn = get_sin_input_fn(
         batch_size, sequence_length, np.pi / 32, seed=1234)
@@ -336,10 +365,13 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase):
         return {'inputs': inputs}, labels
       return input_fn
 
+    seq_columns = [tf.contrib.layers.real_valued_column(
+        'inputs', dimension=cell_size)]
     config = tf.contrib.learn.RunConfig(tf_random_seed=21212)
     sequence_estimator = dynamic_rnn_estimator.multi_value_rnn_classifier(
         num_classes=2,
         num_units=cell_size,
+        sequence_feature_columns=seq_columns,
         learning_rate=learning_rate,
         config=config)
 
@@ -421,9 +453,12 @@ class SingleValueRNNEstimatorTest(tf.test.TestCase):
         return {'inputs': inputs}, labels
       return input_fn
 
+    seq_columns = [tf.contrib.layers.real_valued_column(
+        'inputs', dimension=cell_size)]
     config = tf.contrib.learn.RunConfig(tf_random_seed=6)
     sequence_regressor = dynamic_rnn_estimator.single_value_rnn_regressor(
         num_units=cell_size,
+        sequence_feature_columns=seq_columns,
         cell_type=cell_type,
         optimizer_type=optimizer_type,
         learning_rate=learning_rate,
@@ -467,10 +502,13 @@ class SingleValueRNNEstimatorTest(tf.test.TestCase):
         return {'inputs': inputs}, labels
       return input_fn
 
+    seq_columns = [tf.contrib.layers.real_valued_column(
+        'inputs', dimension=cell_size)]
     config = tf.contrib.learn.RunConfig(tf_random_seed=77)
     sequence_classifier = dynamic_rnn_estimator.single_value_rnn_classifier(
         num_classes=2,
         num_units=cell_size,
+        sequence_feature_columns=seq_columns,
         cell_type=cell_type,
         optimizer_type=optimizer_type,
         learning_rate=learning_rate,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index 99afefe084e..cd8c12d3044 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 import abc
+import collections
 import copy
 import inspect
 import itertools
@@ -52,6 +53,8 @@ from tensorflow.contrib.learn.python.learn.utils import export
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import device_setter
@@ -81,6 +84,12 @@ class ModeKeys(object):
   INFER = 'infer'
 
 
+class ModelFnOps(
+    collections.namedtuple('ModelFnOps', ['predictions', 'loss', 'training_op',
+                                          'default_metrics', 'signature_fn'])):
+  pass
+
+
 def _get_input_fn(x, y, input_fn, feed_fn, batch_size, shuffle=False, epochs=1):
   """Make inputs into input and feed functions."""
   if input_fn is None:
@@ -230,6 +239,9 @@ def _make_metrics_ops(metrics, features, targets, predictions):
 
     if isinstance(name, tuple):
       # Multi-head metrics.
+      if len(name) != 2:
+        raise ValueError('Invalid metric for {}. It returned a tuple with '
+                         'len {}, expected 2.'.format(name, len(name)))
       if not isinstance(predictions, dict):
         raise ValueError(
             'Metrics passed provide (name, prediction), '
@@ -371,7 +383,7 @@ class BaseEstimator(
           provided.
     """
     logging.warning('The current implementation of partial_fit is not optimized'
-                    'for use in a loop. Consider using fit() instead.')
+                    ' for use in a loop. Consider using fit() instead.')
     return self.fit(x=x, y=y, input_fn=input_fn, steps=steps,
                     batch_size=batch_size, monitors=monitors)
 
@@ -405,7 +417,7 @@ class BaseEstimator(
       AS_ITERABLE_DATE, AS_ITERABLE_INSTRUCTIONS, as_iterable=False)
   def predict(
       self, x=None, input_fn=None, batch_size=None, outputs=None,
-      as_iterable=False):
+      as_iterable=True):
     """Returns predictions for given features.
 
     Args:
@@ -602,26 +614,26 @@ class BaseEstimator(
 
   def _check_inputs(self, features, targets):
     if self._features_info is not None:
-      logging.warning('Given features: %s, required signatures: %s.',
-                      str(features), str(self._features_info))
+      logging.debug('Given features: %s, required signatures: %s.',
+                    str(features), str(self._features_info))
       if not tensor_signature.tensors_compatible(features, self._features_info):
         raise ValueError('Features are incompatible with given information. '
                          'Given features: %s, required signatures: %s.' %
                          (str(features), str(self._features_info)))
     else:
       self._features_info = tensor_signature.create_signatures(features)
-      logging.info('Setting feature info to %s', str(self._features_info))
+      logging.debug('Setting feature info to %s.', str(self._features_info))
     if targets is not None:
       if self._targets_info is not None:
-        logging.warning('Given targets: %s, required signatures: %s.',
-                        str(targets), str(self._targets_info))
+        logging.debug('Given targets: %s, required signatures: %s.',
+                      str(targets), str(self._targets_info))
         if not tensor_signature.tensors_compatible(targets, self._targets_info):
           raise ValueError('Targets are incompatible with given information. '
                            'Given targets: %s, required signatures: %s.' %
                            (str(targets), str(self._targets_info)))
       else:
         self._targets_info = tensor_signature.create_signatures(targets)
-        logging.info('Setting targets info to %s', str(self._targets_info))
+        logging.debug('Setting targets info to %s', str(self._targets_info))
 
   def _train_model(self,
                    input_fn,
@@ -781,7 +793,7 @@ class BaseEstimator(
     return result
 
   def _infer_model(
-      self, input_fn, feed_fn=None, outputs=None, as_iterable=False):
+      self, input_fn, feed_fn=None, outputs=None, as_iterable=True):
     # Check that model has been trained.
     checkpoint_path = saver.latest_checkpoint(self._model_dir)
     if not checkpoint_path:
@@ -883,8 +895,15 @@ class Estimator(BaseEstimator):
 
     Args:
       model_fn: Model function, takes features and targets tensors or dicts of
-                tensors and returns predictions and loss tensors.
-                Supports next three signatures for the function:
+                tensors and returns tuple of:
+
+          * predictions: `Tensor`, `SparseTensor` or dictionary of same.
+              Can also be any type that is convertible to a `Tensor` or
+              `SparseTensor`, or dictionary of same.
+          * loss: Scalar loss `Tensor`.
+          * train_op: Training update `Tensor` or `Operation`.
+
+         Supports next three signatures for the function:
 
           * `(features, targets) -> (predictions, loss, train_op)`
           * `(features, targets, mode) -> (predictions, loss, train_op)`
@@ -929,7 +948,7 @@ class Estimator(BaseEstimator):
                          'arguments, but not None params (%s) are passed.' %
                          (model_fn, params))
       if params is None and 'params' in model_fn_args:
-        logging.warning('Estimator\'s model_fn (%s) has includes params '
+        logging.warning('Estimator\'s model_fn (%s) includes params '
                         'argument, but params are not passed to Estimator.',
                         model_fn)
     self._model_fn = model_fn
@@ -943,10 +962,48 @@ class Estimator(BaseEstimator):
     model_fn_args = _get_arguments(self._model_fn)
     if 'mode' in model_fn_args:
       if 'params' in model_fn_args:
-        return self._model_fn(features, targets, mode=mode, params=self.params)
+        predictions, loss, train_op = self._model_fn(
+            features, targets, mode=mode, params=self.params)
       else:
-        return self._model_fn(features, targets, mode=mode)
-    return self._model_fn(features, targets)
+        predictions, loss, train_op = self._model_fn(
+            features, targets, mode=mode)
+    else:
+      predictions, loss, train_op = self._model_fn(features, targets)
+
+    # Validate train_op.
+    if train_op is None:
+      if mode == ModeKeys.TRAIN:
+        raise ValueError('Missing train_op.')
+    elif not isinstance(train_op, ops.Operation):
+      train_op = ops.convert_to_tensor(train_op).op
+
+    # Validate loss.
+    if loss is None:
+      if mode in (ModeKeys.TRAIN, ModeKeys.EVAL):
+        raise ValueError('Missing loss.')
+    else:
+      loss = ops.convert_to_tensor(loss)
+      loss_shape = loss.get_shape()
+      if loss_shape.num_elements() not in (None, 1):
+        raise ValueError('Loss must be scalar: %s.' % loss)
+      if not loss_shape.is_compatible_with(tensor_shape.scalar()):
+        loss = array_ops.reshape(loss, [])
+
+    # Validate predictions.
+    if predictions is None:
+      if mode == ModeKeys.INFER:
+        raise ValueError('Missing predictions.')
+    else:
+      if isinstance(predictions, dict):
+        predictions = {
+            k: contrib_framework.convert_to_tensor_or_sparse_tensor(v)
+            for k, v in six.iteritems(predictions)
+        }
+      else:
+        predictions = contrib_framework.convert_to_tensor_or_sparse_tensor(
+            predictions)
+
+    return predictions, loss, train_op
 
   def _get_train_ops(self, features, targets):
     """Method that builds model graph and returns trainer ops.
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
index 0acdbf20c3e..7e36ed078f9 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
@@ -37,9 +37,9 @@ _IRIS_INPUT_DIM = 4
 
 def boston_input_fn(num_epochs=None):
   boston = tf.contrib.learn.datasets.load_boston()
-  features = tf.reshape(tf.constant(boston.data), [-1, _BOSTON_INPUT_DIM])
-  if num_epochs:
-    features = tf.train.limit_epochs(features, num_epochs=num_epochs)
+  features = tf.train.limit_epochs(
+      tf.reshape(tf.constant(boston.data), [-1, _BOSTON_INPUT_DIM]),
+      num_epochs=num_epochs)
   target = tf.reshape(tf.constant(boston.target), [-1, 1])
   return features, target
 
@@ -61,7 +61,10 @@ def boston_eval_fn():
 
 
 def linear_model_params_fn(features, target, mode, params):
-  assert mode in ('train', 'eval', 'infer')
+  assert mode in (
+      tf.contrib.learn.ModeKeys.TRAIN,
+      tf.contrib.learn.ModeKeys.EVAL,
+      tf.contrib.learn.ModeKeys.INFER)
   prediction, loss = (
       tf.contrib.learn.models.linear_regression_zero_init(features, target)
   )
@@ -72,7 +75,10 @@ def linear_model_params_fn(features, target, mode, params):
 
 
 def linear_model_fn(features, target, mode):
-  assert mode in ('train', 'eval', 'infer')
+  assert mode in (
+      tf.contrib.learn.ModeKeys.TRAIN,
+      tf.contrib.learn.ModeKeys.EVAL,
+      tf.contrib.learn.ModeKeys.INFER)
   prediction, loss = (
       tf.contrib.learn.models.linear_regression_zero_init(features, target)
   )
@@ -120,6 +126,46 @@ class CheckCallsMonitor(tf.contrib.learn.monitors.BaseMonitor):
 
 class EstimatorTest(tf.test.TestCase):
 
+  def testInvalidModelFn_no_train_op(self):
+    def _invalid_model_fn(features, target):
+      # pylint: disable=unused-argument
+      tf.Variable(42.0, 'weight')
+      return None, None, None
+    est = tf.contrib.learn.Estimator(model_fn=_invalid_model_fn)
+    with self.assertRaisesRegexp(ValueError, 'Missing train_op'):
+      est.fit(input_fn=boston_input_fn, steps=1)
+
+  def testInvalidModelFn_no_loss(self):
+    def _invalid_model_fn(features, target, mode):
+      # pylint: disable=unused-argument
+      w = tf.Variable(42.0, 'weight')
+      loss = 100.0 - w
+      train_op = w.assign_add(loss / 100.0)
+      if mode == tf.contrib.learn.ModeKeys.EVAL:
+        loss = None
+      return None, loss, train_op
+    est = tf.contrib.learn.Estimator(model_fn=_invalid_model_fn)
+    est.fit(input_fn=boston_input_fn, steps=1)
+    with self.assertRaisesRegexp(ValueError, 'Missing loss'):
+      est.evaluate(input_fn=boston_eval_fn, steps=1)
+
+  def testInvalidModelFn_no_prediction(self):
+    def _invalid_model_fn(features, target):
+      # pylint: disable=unused-argument
+      w = tf.Variable(42.0, 'weight')
+      loss = 100.0 - w
+      train_op = w.assign_add(loss / 100.0)
+      return None, loss, train_op
+    est = tf.contrib.learn.Estimator(model_fn=_invalid_model_fn)
+    est.fit(input_fn=boston_input_fn, steps=1)
+    est.evaluate(input_fn=boston_eval_fn, steps=1)
+    with self.assertRaisesRegexp(ValueError, 'Missing prediction'):
+      est.predict(input_fn=boston_input_fn)
+    with self.assertRaisesRegexp(ValueError, 'Missing prediction'):
+      est.predict(
+          input_fn=functools.partial(boston_input_fn, num_epochs=1),
+          as_iterable=True)
+
   def testCustomConfig(self):
     test_random_seed = 5783452
 
@@ -211,7 +257,7 @@ class EstimatorTest(tf.test.TestCase):
         metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
     self.assertAllClose(scores2['MSE'],
                         scores['MSE'])
-    predictions = est2.predict(x=boston.data)
+    predictions = np.array(list(est2.predict(x=boston.data)))
     other_score = _sklearn.mean_squared_error(predictions, float64_target)
     self.assertAllClose(other_score, scores['MSE'])
 
@@ -238,7 +284,7 @@ class EstimatorTest(tf.test.TestCase):
         x=boston.data,
         y=float64_target,
         metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error})
-    predictions = est.predict(x=boston.data)
+    predictions = np.array(list(est.predict(x=boston.data)))
     other_score = _sklearn.mean_squared_error(predictions, boston.target)
     self.assertAllClose(other_score, scores['MSE'])
     self.assertTrue('global_step' in scores)
@@ -252,13 +298,17 @@ class EstimatorTest(tf.test.TestCase):
         x=iris.data,
         y=iris.target,
         metrics={('accuracy', 'class'): tf.contrib.metrics.streaming_accuracy})
-    predictions = est.predict(x=iris.data)
-    predictions_class = est.predict(x=iris.data, outputs=['class'])
-    self.assertEqual(predictions['class'].shape[0], iris.target.shape[0])
-    self.assertAllClose(predictions['class'], predictions_class['class'])
-    self.assertAllClose(predictions['class'], np.argmax(predictions['prob'],
-                                                        axis=1))
-    other_score = _sklearn.accuracy_score(iris.target, predictions['class'])
+    predictions = list(est.predict(x=iris.data))
+    predictions_class = list(est.predict(x=iris.data, outputs=['class']))
+    self.assertEqual(len(predictions), iris.target.shape[0])
+    classes_batch = np.array([p['class'] for p in predictions])
+    self.assertAllClose(
+        classes_batch,
+        np.array([p['class'] for p in predictions_class]))
+    self.assertAllClose(
+        classes_batch,
+        np.argmax(np.array([p['prob'] for p in predictions]), axis=1))
+    other_score = _sklearn.accuracy_score(iris.target, classes_batch)
     self.assertAllClose(other_score, scores['accuracy'])
     self.assertTrue('global_step' in scores)
     self.assertEqual(scores['global_step'], 100)
@@ -268,8 +318,8 @@ class EstimatorTest(tf.test.TestCase):
     est = tf.contrib.learn.Estimator(model_fn=logistic_model_no_mode_fn)
     est.fit(input_fn=iris_input_fn, steps=100)
     _ = est.evaluate(input_fn=iris_input_fn, steps=1)
-    predictions = est.predict(x=iris.data)['class']
-    self.assertEqual(predictions.shape[0], iris.target.shape[0])
+    predictions = list(est.predict(x=iris.data))
+    self.assertEqual(len(predictions), iris.target.shape[0])
 
   def testIrisIterator(self):
     iris = tf.contrib.learn.datasets.load_iris()
@@ -278,8 +328,8 @@ class EstimatorTest(tf.test.TestCase):
     y_iter = itertools.islice(iris.target, 100)
     est.fit(x_iter, y_iter, steps=100)
     _ = est.evaluate(input_fn=iris_input_fn, steps=1)
-    predictions = est.predict(x=iris.data)['class']
-    self.assertEqual(predictions.shape[0], iris.target.shape[0])
+    predictions = list(est.predict(x=iris.data))
+    self.assertEqual(len(predictions), iris.target.shape[0])
 
   def testTrainInputFn(self):
     est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
@@ -304,32 +354,16 @@ class EstimatorTest(tf.test.TestCase):
     est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
     boston = tf.contrib.learn.datasets.load_boston()
     est.fit(input_fn=boston_input_fn, steps=1)
-    output = est.predict(boston.data)
-    self.assertEqual(output.shape[0], boston.target.shape[0])
+    output = list(est.predict(x=boston.data, batch_size=10))
+    self.assertEqual(len(output), boston.target.shape[0])
 
   def testPredictInputFn(self):
-    est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
-    boston = tf.contrib.learn.datasets.load_boston()
-    est.fit(input_fn=boston_input_fn, steps=1)
-    output = est.predict(input_fn=boston_input_fn)
-    self.assertEqual(output.shape[0], boston.target.shape[0])
-
-  def testPredictAsIterable(self):
-    est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
-    boston = tf.contrib.learn.datasets.load_boston()
-    est.fit(input_fn=boston_input_fn, steps=1)
-    self.assertEqual(
-        len(list(est.predict(boston.data, batch_size=10, as_iterable=True))),
-        boston.target.shape[0])
-
-  def testPredictInputFnAsIterable(self):
     est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
     boston = tf.contrib.learn.datasets.load_boston()
     est.fit(input_fn=boston_input_fn, steps=1)
     input_fn = functools.partial(boston_input_fn, num_epochs=1)
-    self.assertEqual(
-        len(list(est.predict(input_fn=input_fn, as_iterable=True))),
-        boston.target.shape[0])
+    output = list(est.predict(input_fn=input_fn))
+    self.assertEqual(len(output), boston.target.shape[0])
 
   def testWrongInput(self):
     def other_input_fn():
diff --git a/tensorflow/contrib/learn/python/learn/tests/estimators_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
similarity index 78%
rename from tensorflow/contrib/learn/python/learn/tests/estimators_test.py
rename to tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
index 8862ba48cc0..35a6c5bf021 100644
--- a/tensorflow/contrib/learn/python/learn/tests/estimators_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
@@ -20,6 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 import random
+import numpy as np
 import tensorflow as tf
 
 from tensorflow.contrib.learn.python import learn
@@ -28,36 +29,6 @@ from tensorflow.contrib.learn.python.learn.estimators._sklearn import accuracy_s
 from tensorflow.contrib.learn.python.learn.estimators._sklearn import train_test_split
 
 
-# TODO(b/29580537): Remove when we deprecate feature column inference.
-class InferredfeatureColumnTest(tf.test.TestCase):
-  """Custom optimizer tests."""
-
-  def testIrisMomentum(self):
-    random.seed(42)
-
-    iris = datasets.load_iris()
-    x_train, x_test, y_train, y_test = train_test_split(iris.data,
-                                                        iris.target,
-                                                        test_size=0.2,
-                                                        random_state=42)
-
-    def custom_optimizer():
-      return tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9)
-
-    cont_features = [
-        tf.contrib.layers.real_valued_column("", dimension=4)]
-    classifier = learn.DNNClassifier(
-        feature_columns=cont_features,
-        hidden_units=[10, 20, 10],
-        n_classes=3,
-        optimizer=custom_optimizer,
-        config=learn.RunConfig(tf_random_seed=1))
-    classifier.fit(x_train, y_train, steps=400)
-    score = accuracy_score(y_test, classifier.predict(x_test))
-
-    self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
-
-
 class FeatureEngineeringFunctionTest(tf.test.TestCase):
   """Tests feature_engineering_fn."""
 
@@ -145,7 +116,8 @@ class CustomOptimizer(tf.test.TestCase):
         optimizer=custom_optimizer,
         config=learn.RunConfig(tf_random_seed=1))
     classifier.fit(x_train, y_train, steps=400)
-    score = accuracy_score(y_test, classifier.predict(x_test))
+    predictions = np.array(list(classifier.predict(x_test)))
+    score = accuracy_score(y_test, predictions)
 
     self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py
new file mode 100644
index 00000000000..4b19f84a7ae
--- /dev/null
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@@ -0,0 +1,850 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Abstractions for the head(s) of a model.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+
+from tensorflow.contrib import losses
+from tensorflow.contrib import metrics as metrics_lib
+from tensorflow.contrib.learn.python.learn import metric_spec
+from tensorflow.contrib.learn.python.learn.estimators import estimator
+from tensorflow.contrib.session_bundle import exporter
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import variables
+from tensorflow.python.training import training
+
+
+# TODO(zakaria): add functions that creates a head and returns ModelOpFn
+
+
+def _regression_head(label_name=None,
+                     weight_column_name=None,
+                     target_dimension=1,
+                     enable_centered_bias=False, head_name=None):
+  """Creates a _Head for linear regression.
+
+  Args:
+    label_name: String, name of the key in label dict. Can be null if label
+        is a tensor (single headed models).
+    weight_column_name: A string defining feature column name representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    target_dimension: dimension of the target for multilabels.
+    enable_centered_bias: A bool. If True, estimator will learn a centered
+      bias variable for each class. Rest of the model structure learns the
+      residual after centered bias.
+    head_name: name of the head. If provided, predictions, summary and metrics
+      keys will be prefixed by the head_name and an underscore.
+
+  Returns:
+    An instance of _Head
+  """
+  return _RegressionHead(train_loss_fn=_mean_squared_loss,
+                         eval_loss_fn=_mean_squared_loss,
+                         label_name=label_name,
+                         weight_column_name=weight_column_name,
+                         target_dimension=target_dimension,
+                         enable_centered_bias=enable_centered_bias,
+                         head_name=head_name)
+
+# TODO(zakaria): Add logistic_regression_head
+
+
+def _multi_class_head(n_classes, label_name=None, weight_column_name=None,
+                      enable_centered_bias=False, head_name=None,
+                      thresholds=None):
+  """Creates a _Head for multi class single label classification.
+
+  The Head uses softmax cross entropy loss.
+
+  Args:
+    n_classes: Integer, number of classes, must be >= 2
+    label_name: String, name of the key in label dict. Can be null if label
+        is a tensor (single headed models).
+    weight_column_name: A string defining feature column name representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    enable_centered_bias: A bool. If True, estimator will learn a centered
+      bias variable for each class. Rest of the model structure learns the
+      residual after centered bias.
+    head_name: name of the head. If provided, predictions, summary and metrics
+      keys will be prefixed by the head_name and an underscore.
+    thresholds: thresholds for eval metrics, defaults to [.5]
+
+  Returns:
+    An instance of _MultiClassHead.
+
+  Raises:
+    ValueError: if n_classes is < 2
+  """
+  if n_classes < 2:
+    raise ValueError("n_classes must be > 1 for classification.")
+  if n_classes == 2:
+    loss_fn = _log_loss_with_two_classes
+  else:
+    loss_fn = _softmax_cross_entropy_loss
+  return _MultiClassHead(train_loss_fn=loss_fn,
+                         eval_loss_fn=loss_fn,
+                         n_classes=n_classes,
+                         label_name=label_name,
+                         weight_column_name=weight_column_name,
+                         enable_centered_bias=enable_centered_bias,
+                         head_name=head_name,
+                         thresholds=thresholds)
+
+
+def _binary_svm_head(label_name=None, weight_column_name=None,
+                     enable_centered_bias=False, head_name=None,
+                     thresholds=None,):
+  """Creates a _TargetColumn for binary classification with SVMs.
+
+  The target column uses binary hinge loss.
+
+  Args:
+    label_name: String, name of the key in label dict. Can be null if label
+      is a tensor (single headed models).
+    weight_column_name: A string defining feature column name representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    enable_centered_bias: A bool. If True, estimator will learn a centered
+      bias variable for each class. Rest of the model structure learns the
+      residual after centered bias.
+    head_name: name of the head. If provided, predictions, summary and metrics
+      keys will be prefixed by the head_name and an underscore.
+    thresholds: thresholds for eval metrics, defaults to [.5]
+
+  Returns:
+    An instance of _TargetColumn.
+
+  """
+  return _BinarySvmHead(label_name=label_name,
+                        weight_column_name=weight_column_name,
+                        enable_centered_bias=enable_centered_bias,
+                        head_name=head_name,
+                        thresholds=thresholds)
+
+
+def _multi_label_head(n_classes, label_name=None, weight_column_name=None,
+                      enable_centered_bias=False, head_name=None,
+                      thresholds=None):
+  """Creates a _Head for multi label classification.
+
+  The Head uses softmax cross entropy loss.
+
+  Args:
+    n_classes: Integer, number of classes, must be >= 2
+    label_name: String, name of the key in label dict. Can be null if label
+        is a tensor (single headed models).
+    weight_column_name: A string defining feature column name representing
+      weights. It is used to down weight or boost examples during training. It
+      will be multiplied by the loss of the example.
+    enable_centered_bias: A bool. If True, estimator will learn a centered
+      bias variable for each class. Rest of the model structure learns the
+      residual after centered bias.
+    head_name: name of the head. If provided, predictions, summary and metrics
+      keys will be prefixed by the head_name and an underscore.
+    thresholds: thresholds for eval metrics, defaults to [.5]
+
+  Returns:
+    An instance of _MultiClassHead.
+
+  Raises:
+    ValueError: if n_classes is < 2
+  """
+  if n_classes < 2:
+    raise ValueError("n_classes must be > 1 for classification.")
+  return _MultiLabelHead(n_classes=n_classes,
+                         label_name=label_name,
+                         weight_column_name=weight_column_name,
+                         enable_centered_bias=enable_centered_bias,
+                         head_name=head_name,
+                         thresholds=thresholds)
+
+
+# TODO(zakaria): Make the classes public once we are ready for users to subclass
+#   them.
+class _Head(object):
+  """Interface for the head/top of a model.
+
+  Given logits or output of a hidden layer, a Head knows how to compute
+  predictions, loss, default metric and export signature.
+  """
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractproperty
+  def logits_dimension(self):
+    raise NotImplementedError("Calling an abstract method.")
+
+  def head_ops(self, features, target, mode, train_op_fn, logits=None,
+               logits_input=None):
+    """Returns ops for a model_fn.
+
+    Args:
+      features: input dict.
+      target: target dict or tensor.
+      mode: estimator's ModeKeys
+      train_op_fn: function that takes a scalar loss and returns an op to
+          optimize with the loss.
+      logits: logits to be used for the head.
+      logits_input: tensor to build logits from.
+
+    Returns:
+      `estimator.ModelFnOps`
+
+    Raises:
+      ValueError: if mode is not recognized.
+    """
+    _check_logits_input_not_supported(logits, logits_input)
+    if mode == estimator.ModeKeys.TRAIN:
+      loss, additional_train_op = self._training_loss(features, target,
+                                                      logits, logits_input)
+
+      train_op = train_op_fn(loss) if train_op_fn else None
+
+      if additional_train_op:
+        if train_op:
+          train_op = control_flow_ops.group(train_op, *additional_train_op)
+        else:
+          train_op = control_flow_ops.group(*additional_train_op)
+
+      return estimator.ModelFnOps(None, loss, train_op,
+                                  self._default_metric(),
+                                  self._create_signature_fn())
+    if mode == estimator.ModeKeys.INFER:
+      predictions = self._infer_op(logits, logits_input)
+      return estimator.ModelFnOps(predictions, None, None,
+                                  self._default_metric(),
+                                  self._create_signature_fn())
+    if mode == estimator.ModeKeys.EVAL:
+      predictions, loss = self._eval_op(features, target, logits, logits_input)
+      return estimator.ModelFnOps(predictions, loss, None,
+                                  self._default_metric(),
+                                  self._create_signature_fn())
+    raise ValueError("mode=%s unrecognized" % str(mode))
+
+  @abc.abstractmethod
+  def _training_loss(self, features, target, logits=None, logits_input=None,
+                     name="training_loss"):
+    raise NotImplementedError("Calling an abstract method.")
+
+  @abc.abstractmethod
+  def _infer_op(self, logits=None, logits_input=None, name="infer_op"):
+    raise NotImplementedError("Calling an abstract method.")
+
+  @abc.abstractmethod
+  def _eval_op(self, features, target, logits=None, logits_input=None,
+               name="eval_op"):
+    raise NotImplementedError("Calling an abstract method.")
+
+  @abc.abstractmethod
+  def _default_metric(self):
+    raise NotImplementedError("Calling an abstract method.")
+
+  @abc.abstractmethod
+  def _create_signature_fn(self):
+    """Creates signature function for the Head.
+    """
+    raise NotImplementedError("Calling an abstract method.")
+
+
+class _RegressionHead(_Head):
+  """_Head for regression."""
+
+  def __init__(self, train_loss_fn, eval_loss_fn, label_name,
+               weight_column_name, target_dimension, enable_centered_bias,
+               head_name):
+    """Base type for all single heads.
+
+    Args:
+      train_loss_fn: loss_fn for training.
+      eval_loss_fn: loss_fn for eval.
+      label_name: String, name of the key in label dict. Can be null if label
+          is a tensor (single headed models).
+      weight_column_name: A string defining feature column name representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example.
+      target_dimension: Integer, number of label columns.
+      enable_centered_bias: A bool. If True, estimator will learn a centered
+        bias variable for each class. Rest of the model structure learns the
+        residual after centered bias.
+      head_name: name of the head. If provided, predictions, summary and metrics
+        keys will be prefixed by the head_name and an underscore.
+    """
+    self._train_loss_fn = train_loss_fn
+    self._eval_loss_fn = eval_loss_fn
+    self._logits_dimension = target_dimension
+    self._label_name = label_name
+    self._weight_column_name = weight_column_name
+    self._head_name = head_name
+    self._enable_centered_bias = enable_centered_bias
+    self._centered_bias_weight_collection = _head_prefixed(head_name,
+                                                           "centered_bias")
+
+  @property
+  def logits_dimension(self):
+    return self._logits_dimension
+
+  def _training_loss(self, features, target, logits=None,
+                     logits_input=None, name="training_loss"):
+    """Returns training loss tensor for this head.
+
+    Training loss is different from the loss reported on the tensorboard as we
+    should respect the example weights when computing the gradient.
+
+      L = sum_{i} w_{i} * l_{i} / B
+
+    where B is the number of examples in the batch, l_{i}, w_{i} are individual
+    losses, and example weight.
+
+    Args:
+      features: features dict.
+      target: either a tensor for labels or in multihead case, a dict of string
+        to target tensor.
+      logits: logits, a float tensor.
+      logits_input: Output of last hidden layer.
+      name: Op name.
+
+    Returns:
+      A tuple of training Loss and additional_train_op (possibly None)
+    """
+    target = _check_target(target, self._label_name)
+
+    centered_bias_step = None
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+      centered_bias_step = [_centered_bias_step(
+          self.logits_dimension,
+          self._centered_bias_weight_collection,
+          target,
+          self._train_loss_fn)]
+
+    loss_unweighted = self._train_loss_fn(logits, target)
+    loss, weighted_average_loss = _loss(
+        loss_unweighted,
+        _weight_tensor(features, self._weight_column_name),
+        name=name)
+    logging_ops.scalar_summary(_head_prefixed(self._head_name, "loss"),
+                               weighted_average_loss)
+    return loss, centered_bias_step
+
+  def _eval_op(self, features, target, logits=None, logits_input=None,
+               name="eval_op"):
+    target = _check_target(target, self._label_name)
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+    loss_unweighted = self._eval_loss_fn(logits, target)
+    loss, _ = _loss(loss_unweighted,
+                    _weight_tensor(features, self._weight_column_name),
+                    name=name)
+
+    predictions = self._logits_to_prediction(logits)
+
+    return predictions, loss
+
+  def _infer_op(self, logits=None, logits_input=None):
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+    return self._logits_to_prediction(logits)
+
+  def _logits_to_prediction(self, logits=None):
+    predictions = {}
+    if self.logits_dimension == 1:
+      predictions[PedictionKey.SCORES] = array_ops.squeeze(
+          logits, squeeze_dims=[1])
+    else:
+      predictions[PedictionKey.SCORES] = logits
+    return predictions
+
+  # pylint: disable=undefined-variable
+  def _create_signature_fn(self):
+    def _regression_signature_fn(examples, unused_features, predictions):
+      if isinstance(predictions, dict):
+        score = predictions[PedictionKey.SCORES]
+      else:
+        score = predictions
+
+      default_signature = exporter.regression_signature(
+          input_tensor=examples, output_tensor=score)
+      # TODO(zakaria): add validation
+      return default_signature, {}
+    return _regression_signature_fn
+
+  def _default_metric(self):
+    return {_head_prefixed(self._head_name, MetricKey.LOSS):
+            _weighted_average_loss_metric_spec(self._eval_loss_fn,
+                                               PedictionKey.SCORES,
+                                               self._label_name,
+                                               self._weight_column_name)}
+
+
+class _MultiClassHead(_Head):
+  """_Head for classification."""
+
+  def __init__(self, train_loss_fn, eval_loss_fn, n_classes, label_name,
+               weight_column_name, enable_centered_bias, head_name,
+               thresholds=None):
+    """Base type for all single heads.
+
+    Args:
+      train_loss_fn: loss_fn for training.
+      eval_loss_fn: loss_fn for eval.
+      n_classes: number of classes.
+      label_name: String, name of the key in label dict. Can be null if label
+          is a tensor (single headed models).
+      weight_column_name: A string defining feature column name representing
+        weights. It is used to down weight or boost examples during training. It
+        will be multiplied by the loss of the example.
+      enable_centered_bias: A bool. If True, estimator will learn a centered
+        bias variable for each class. Rest of the model structure learns the
+        residual after centered bias.
+      head_name: name of the head. If provided, predictions, summary and metrics
+        keys will be prefixed by the head_name and an underscore.
+      thresholds: thresholds for eval.
+
+    Raises:
+      ValueError: if n_classes is invalid.
+    """
+    if n_classes < 2:
+      raise ValueError("n_classes must be >= 2")
+    self._thresholds = thresholds if thresholds else [.5]
+
+    self._train_loss_fn = train_loss_fn
+    self._eval_loss_fn = eval_loss_fn
+    self._logits_dimension = 1 if n_classes == 2 else n_classes
+    self._label_name = label_name
+    self._weight_column_name = weight_column_name
+    self._head_name = head_name
+    self._enable_centered_bias = enable_centered_bias
+    self._centered_bias_weight_collection = _head_prefixed(head_name,
+                                                           "centered_bias")
+
+  @property
+  def logits_dimension(self):
+    return self._logits_dimension
+
+  def _training_loss(self, features, target, logits=None,
+                     logits_input=None, name="training_loss"):
+    """Returns training loss tensor for this head.
+
+    Training loss is different from the loss reported on the tensorboard as we
+    should respect the example weights when computing the gradient.
+
+      L = sum_{i} w_{i} * l_{i} / B
+
+    where B is the number of examples in the batch, l_{i}, w_{i} are individual
+    losses, and example weight.
+
+    Args:
+      features: features dict.
+      target: either a tensor for labels or in multihead case, a dict of string
+        to target tensor.
+      logits: logits, a float tensor.
+      logits_input: Output of last hidden layer.
+      name: Op name.
+
+    Returns:
+      A tuple of training Loss and additional_train_op (possibly None)
+    """
+    target = _check_target(target, self._label_name)
+
+    centered_bias_step = None
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+      centered_bias_step = [_centered_bias_step(
+          self.logits_dimension,
+          self._centered_bias_weight_collection,
+          target,
+          self._train_loss_fn)]
+
+    loss_unweighted = self._train_loss_fn(logits, target)
+    loss, weighted_average_loss = _loss(
+        loss_unweighted,
+        _weight_tensor(features, self._weight_column_name),
+        name=name)
+    logging_ops.scalar_summary(_head_prefixed(self._head_name, "loss"),
+                               weighted_average_loss)
+    return loss, centered_bias_step
+
+  def _eval_op(self, features, target, logits=None, logits_input=None,
+               name="eval_op"):
+    target = _check_target(target, self._label_name)
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+    loss_unweighted = self._eval_loss_fn(logits, target)
+    loss, _ = _loss(loss_unweighted,
+                    _weight_tensor(features, self._weight_column_name),
+                    name=name)
+
+    predictions = self._logits_to_prediction(logits)
+
+    return predictions, loss
+
+  def _infer_op(self, logits=None, logits_input=None):
+    if self._enable_centered_bias:
+      logits = nn.bias_add(logits, _centered_bias(
+          self.logits_dimension,
+          self._centered_bias_weight_collection))
+    return self._logits_to_prediction(logits)
+
+  def _logits_to_prediction(self, logits=None):
+    predictions = {PedictionKey.LOGITS: logits}
+    if self.logits_dimension == 1:
+      predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits)
+      logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
+    predictions[PedictionKey.PROBABILITIES] = nn.softmax(logits)
+    # Workaround for argmax dropping the second demension.
+    predictions[PedictionKey.CLASSES] = array_ops.expand_dims(
+        math_ops.argmax(logits, 1), 1)
+    return predictions
+
+  def _create_signature_fn(self):
+    """See superclass."""
+    def _classification_signature_fn(examples, unused_features, predictions):
+      """Servo signature function."""
+      if isinstance(predictions, dict):
+        default_signature = exporter.classification_signature(
+            input_tensor=examples,
+            classes_tensor=predictions[PedictionKey.CLASSES],
+            scores_tensor=predictions[PedictionKey.PROBABILITIES])
+      else:
+        default_signature = exporter.classification_signature(
+            input_tensor=examples,
+            scores_tensor=predictions)
+
+      # TODO(zakaria): add validation
+      return default_signature, {}
+    return _classification_signature_fn
+
+  def _default_metric(self):
+    metrics = {_head_prefixed(self._head_name, MetricKey.LOSS):
+               _weighted_average_loss_metric_spec(self._eval_loss_fn,
+                                                  PedictionKey.LOGITS,
+                                                  self._label_name,
+                                                  self._weight_column_name)}
+
+    # TODO(b/29366811): This currently results in both an "accuracy" and an
+    # "accuracy/threshold_0.500000_mean" metric for binary classification.
+    metrics[_head_prefixed(self._head_name, MetricKey.ACCURACY)] = (
+        metric_spec.MetricSpec(metrics_lib.streaming_accuracy,
+                               PedictionKey.CLASSES, self._label_name,
+                               self._weight_column_name))
+    if self.logits_dimension == 1:
+      def _add_binary_metric(metric_key, metric_fn):
+        metrics[_head_prefixed(self._head_name, metric_key)] = (
+            metric_spec.MetricSpec(metric_fn,
+                                   PedictionKey.LOGISTIC,
+                                   self._label_name))
+      _add_binary_metric(MetricKey.PREDICTION_MEAN, _predictions_streaming_mean)
+      _add_binary_metric(MetricKey.TARGET_MEAN, _target_streaming_mean)
+
+      # Also include the streaming mean of the label as an accuracy baseline, as
+      # a reminder to users.
+      _add_binary_metric(MetricKey.ACCURACY_BASELINE, _target_streaming_mean)
+
+      _add_binary_metric(MetricKey.AUC, _streaming_auc)
+
+      for threshold in self._thresholds:
+        _add_binary_metric(MetricKey.ACCURACY_MEAN % threshold,
+                           _accuracy_at_threshold(threshold))
+        # Precision for positive examples.
+        _add_binary_metric(MetricKey.PRECISION_MEAN % threshold,
+                           _streaming_at_threshold(
+                               metrics_lib.streaming_precision_at_thresholds,
+                               threshold),)
+        # Recall for positive examples.
+        _add_binary_metric(MetricKey.RECALL_MEAN % threshold,
+                           _streaming_at_threshold(
+                               metrics_lib.streaming_recall_at_thresholds,
+                               threshold))
+    return metrics
+
+
+def _check_target(target, label_name):
+  target = target[label_name] if isinstance(target, dict) else target
+  if isinstance(target, ops.SparseTensor):
+    raise ValueError("SparseTensor is not supported as a target/label.")
+  return target
+
+
+class _BinarySvmHead(_MultiClassHead):
+  """_Head for binary classification using SVMs."""
+
+  def __init__(self, label_name, weight_column_name, enable_centered_bias,
+               head_name, thresholds):
+    def loss_fn(logits, target):
+      check_shape_op = control_flow_ops.Assert(
+          math_ops.less_equal(array_ops.rank(target), 2),
+          ["target's shape should be either [batch_size, 1] or [batch_size]"])
+      with ops.control_dependencies([check_shape_op]):
+        target = array_ops.reshape(
+            target, shape=[array_ops.shape(target)[0], 1])
+      return losses.hinge_loss(logits, target)
+
+    super(_BinarySvmHead, self).__init__(
+        train_loss_fn=loss_fn,
+        eval_loss_fn=loss_fn,
+        n_classes=2,
+        label_name=label_name,
+        weight_column_name=weight_column_name,
+        enable_centered_bias=enable_centered_bias,
+        head_name=head_name,
+        thresholds=thresholds)
+
+  def _logits_to_prediction(self, logits=None):
+    predictions = {}
+    # Workaround for argmax dropping the second demension.
+    predictions[PedictionKey.LOGITS] = array_ops.expand_dims(
+        math_ops.argmax(logits, 1), 1)
+    logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
+    predictions[PedictionKey.CLASSES] = array_ops.expand_dims(
+        math_ops.argmax(logits, 1), 1)
+
+    return predictions
+
+
+class _MultiLabelHead(_MultiClassHead):
+  """_Head for multlabel classification."""
+
+  # TODO(zakaria): add signature and metric for multilabel.
+  def __init__(self, n_classes, label_name,
+               weight_column_name, enable_centered_bias, head_name,
+               thresholds):
+
+    super(_MultiLabelHead, self).__init__(
+        train_loss_fn=_sigmoid_cross_entropy_loss,
+        eval_loss_fn=_sigmoid_cross_entropy_loss,
+        n_classes=n_classes,
+        label_name=label_name,
+        weight_column_name=weight_column_name,
+        enable_centered_bias=enable_centered_bias,
+        head_name=head_name,
+        thresholds=thresholds)
+
+  def _logits_to_prediction(self, logits=None):
+    predictions = {PedictionKey.LOGITS: logits}
+    if self.logits_dimension == 1:
+      predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits)
+      logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits])
+    predictions[PedictionKey.PROBABILITIES] = math_ops.sigmoid(logits)
+    # Workaround for argmax dropping the second demension.
+    predictions[PedictionKey.CLASSES] = math_ops.to_int64(
+        math_ops.greater(logits, 0))
+    return predictions
+
+
+def _weighted_loss(loss, weight):
+  """Returns cumulative weighted loss."""
+  unweighted_loss = array_ops.reshape(loss, shape=(-1,))
+  weighted_loss = math_ops.mul(unweighted_loss,
+                               array_ops.reshape(
+                                   weight, shape=(-1,)))
+  return weighted_loss
+
+
+def _weight_tensor(features, weight_column_name):
+  if not weight_column_name:
+    return None
+  else:
+    return array_ops.reshape(
+        math_ops.to_float(features[weight_column_name]),
+        shape=(-1,))
+
+
+def _loss(loss_unweighted, weight, name):
+  """Returns loss."""
+  if weight is None:
+    loss = math_ops.reduce_mean(loss_unweighted, name=name)
+    return loss, loss
+  else:
+    loss_weighted = _weighted_loss(loss_unweighted, weight)
+    weighted_average_loss = math_ops.div(
+        math_ops.reduce_sum(loss_weighted),
+        math_ops.to_float(math_ops.reduce_sum(weight)),
+        name="weighted_average_loss")
+    loss = math_ops.reduce_mean(loss_weighted, name=name)
+    return loss, weighted_average_loss
+
+
+def _check_logits_input_not_supported(logits, logits_input):
+  if logits_input is not None or logits is None:
+    raise NotImplementedError("logits_input is not supported yet, "
+                              "must pass logits")
+
+
+def _centered_bias(logits_dimension, weight_collection):
+  """Creates and returns centered bias."""
+  centered_bias = variables.Variable(
+      array_ops.zeros([logits_dimension]),
+      collections=[weight_collection, ops.GraphKeys.VARIABLES],
+      name="centered_bias_weight")
+  logging_ops.scalar_summary(
+      ["centered_bias_%d" % cb for cb in range(logits_dimension)],
+      array_ops.reshape(centered_bias, [-1]))
+  return centered_bias
+
+
+def _centered_bias_step(logits_dimension, weight_collection, target,
+                        train_loss_fn):
+  """Creates and returns training op for centered bias."""
+  centered_bias = ops.get_collection(weight_collection)
+  batch_size = array_ops.shape(target)[0]
+  logits = array_ops.reshape(
+      array_ops.tile(centered_bias[0], [batch_size]),
+      [batch_size, logits_dimension])
+  with ops.name_scope(None, "centered_bias", (target, logits)):
+    centered_bias_loss = math_ops.reduce_mean(
+        train_loss_fn(logits, target), name="training_loss")
+  # Learn central bias by an optimizer. 0.1 is a convervative lr for a
+  # single variable.
+  return training.AdagradOptimizer(0.1).minimize(
+      centered_bias_loss, var_list=centered_bias)
+
+
+def _head_prefixed(head_name, val):
+  return "%s_%s" % (head_name, val) if head_name else val
+
+
+# TODO(zakaria): use contrib losses.
+def _mean_squared_loss(logits, target):
+  # To prevent broadcasting inside "-".
+  if len(target.get_shape()) == 1:
+    target = array_ops.expand_dims(target, dim=[1])
+  # TODO(zakaria): make sure it does not recreate the broadcast bug.
+  if len(logits.get_shape()) == 1:
+    logits = array_ops.expand_dims(logits, dim=[1])
+  logits.get_shape().assert_is_compatible_with(target.get_shape())
+  return math_ops.square(logits - math_ops.to_float(target))
+
+
+def _log_loss_with_two_classes(logits, target):
+  # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target.
+  if len(target.get_shape()) == 1:
+    target = array_ops.expand_dims(target, dim=[1])
+  loss_vec = nn.sigmoid_cross_entropy_with_logits(logits,
+                                                  math_ops.to_float(target))
+  return loss_vec
+
+
+def _softmax_cross_entropy_loss(logits, target):
+  # Check that we got int32/int64 for classification.
+  if (not target.dtype.is_compatible_with(dtypes.int64) and
+      not target.dtype.is_compatible_with(dtypes.int32)):
+    raise ValueError("Target's dtype should be int32, int64 or compatible. "
+                     "Instead got %s." % target.dtype)
+  # sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
+  if len(target.get_shape()) == 2:
+    target = array_ops.squeeze(target, squeeze_dims=[1])
+  loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, target)
+  return loss_vec
+
+
+def _sigmoid_cross_entropy_loss(logits, target):
+  # sigmoid_cross_entropy_with_logits requires [batch_size, n_classes] target.
+  return nn.sigmoid_cross_entropy_with_logits(logits, math_ops.to_float(target))
+
+
+def _float_weights_or_none(weights):
+  if weights is None:
+    return None
+  return math_ops.to_float(weights)
+
+
+def _weighted_average_loss_metric_spec(loss_fn, predictoin_key,
+                                       label_key, weight_key):
+  def _streaming_weighted_average_loss(predictions, target, weights=None):
+    loss_unweighted = loss_fn(predictions, target)
+    _, weighted_average_loss = _loss(loss_unweighted,
+                                     weights,
+                                     name="eval_loss")
+    return metrics_lib.streaming_mean(weighted_average_loss)
+  return metric_spec.MetricSpec(_streaming_weighted_average_loss,
+                                predictoin_key, label_key, weight_key)
+
+
+def _target_streaming_mean(unused_predictions, target, weights=None):
+  return metrics_lib.streaming_mean(target, weights=weights)
+
+
+def _predictions_streaming_mean(predictions, unused_target, weights=None):
+  return metrics_lib.streaming_mean(predictions, weights=weights)
+
+
+def _streaming_auc(predictions, target, weights=None):
+  return metrics_lib.streaming_auc(predictions, target,
+                                   weights=_float_weights_or_none(weights))
+
+
+def _accuracy_at_threshold(threshold):
+
+  def _accuracy_metric(predictions, target, weights=None):
+    threshold_predictions = math_ops.to_float(
+        math_ops.greater_equal(predictions, threshold))
+    return metrics_lib.streaming_accuracy(predictions=threshold_predictions,
+                                          labels=target,
+                                          weights=weights)
+
+  return _accuracy_metric
+
+
+def _streaming_at_threshold(streaming_metrics_fn, threshold):
+
+  def _streaming_metrics(predictions, target, weights=None):
+    precision_tensor, update_op = streaming_metrics_fn(
+        predictions, labels=target, thresholds=[threshold],
+        weights=_float_weights_or_none(weights))
+    return array_ops.squeeze(precision_tensor), update_op
+
+  return _streaming_metrics
+
+
+# PedictionKey.CLASSES
+class PedictionKey(object):
+  CLASSES = "classes"
+  PROBABILITIES = "probabilities"
+  LOGITS = "logits"
+  LOGISTIC = "logistic"
+  SCORES = "scores"
+
+
+class MetricKey(object):
+  LOSS = "loss"
+  AUC = "auc"
+  PREDICTION_MEAN = "labels/prediction_mean"
+  TARGET_MEAN = "labels/actual_target_mean"
+  ACCURACY = "accuracy"
+  ACCURACY_BASELINE = "accuracy/baseline_target_mean"
+  ACCURACY_MEAN = "accuracy/threshold_%f_mean"
+  PRECISION_MEAN = "precision/positive_threshold_%f_mean"
+  RECALL_MEAN = "recall/positive_threshold_%f_mean"
diff --git a/tensorflow/contrib/learn/python/learn/estimators/head_test.py b/tensorflow/contrib/learn/python/learn/estimators/head_test.py
new file mode 100644
index 00000000000..dcb292905c7
--- /dev/null
+++ b/tensorflow/contrib/learn/python/learn/estimators/head_test.py
@@ -0,0 +1,174 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for head.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
+
+
+class RegressionModelHeadTest(tf.test.TestCase):
+
+  # TODO(zakaria): test multilabel regresssion.
+  def testRegression(self):
+    head = head_lib._regression_head()
+    with tf.Graph().as_default(), tf.Session() as sess:
+      prediction = tf.constant([[1.], [1.], [3.]])
+      targets = tf.constant([[0.], [1.], [1.]])
+      model_fn_ops = head.head_ops({}, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=prediction)
+      self.assertAlmostEqual(5. / 3, sess.run(model_fn_ops.loss))
+
+  def testRegressionWithWeights(self):
+    head = head_lib._regression_head(
+        weight_column_name="label_weight")
+    with tf.Graph().as_default(), tf.Session() as sess:
+      features = {"label_weight": tf.constant([[2.], [5.], [0.]])}
+      prediction = tf.constant([[1.], [1.], [3.]])
+      targets = tf.constant([[0.], [1.], [1.]])
+      model_fn_ops = head.head_ops(features, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=prediction)
+      self.assertAlmostEqual(2. / 3, sess.run(model_fn_ops.loss), places=3)
+
+  def testErrorInSparseTensorTarget(self):
+    head = head_lib._regression_head()
+    with tf.Graph().as_default():
+      prediction = tf.constant([[1.], [1.], [3.]])
+      targets = tf.SparseTensor(
+          indices=tf.constant([[0, 0], [1, 0], [2, 0]], dtype=tf.int64),
+          values=tf.constant([0., 1., 1.]),
+          shape=[3, 1])
+      with self.assertRaisesRegexp(
+          ValueError, "SparseTensor is not supported as a target"):
+        head.head_ops({}, targets, tf.contrib.learn.ModeKeys.TRAIN, None,
+                      logits=prediction)
+
+
+class MultiClassModelHeadTest(tf.test.TestCase):
+
+  def testBinaryClassification(self):
+    head = head_lib._multi_class_head(n_classes=2)
+    with tf.Graph().as_default(), tf.Session() as sess:
+      logits = tf.constant([[1.], [1.]])
+      targets = tf.constant([[1.], [0.]])
+      # logloss: z:label, x:logit
+      # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
+      model_fn_ops = head.head_ops({}, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=logits)
+      self.assertAlmostEqual(.81326163, sess.run(model_fn_ops.loss))
+
+  def testErrorInSparseTensorTarget(self):
+    head = head_lib._multi_class_head(n_classes=2)
+    with tf.Graph().as_default():
+      prediction = tf.constant([[1.], [1.], [3.]])
+      targets = tf.SparseTensor(
+          indices=tf.constant([[0, 0], [1, 0], [2, 0]], dtype=tf.int64),
+          values=tf.constant([0, 1, 1]),
+          shape=[3, 1])
+      with self.assertRaisesRegexp(
+          ValueError, "SparseTensor is not supported as a target"):
+        head.head_ops({}, targets, tf.contrib.learn.ModeKeys.TRAIN, None,
+                      logits=prediction)
+
+  def testBinaryClassificationWithWeights(self):
+    head = head_lib._multi_class_head(
+        n_classes=2, weight_column_name="label_weight")
+    with tf.Graph().as_default(), tf.Session() as sess:
+      features = {"label_weight": tf.constant([[1.], [0.]])}
+      logits = tf.constant([[1.], [1.]])
+      targets = tf.constant([[1.], [0.]])
+      # logloss: z:label, x:logit
+      # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
+      model_fn_ops = head.head_ops(features, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=logits)
+      self.assertAlmostEqual(.31326166 / 2, sess.run(model_fn_ops.loss))
+
+  def testMultiClass(self):
+    head = head_lib._multi_class_head(n_classes=3)
+    with tf.Graph().as_default(), tf.Session() as sess:
+      logits = tf.constant([[1., 0., 0.]])
+      targets = tf.constant([2])
+      # logloss: z:label, x:logit
+      # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
+      model_fn_ops = head.head_ops({}, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=logits)
+      self.assertAlmostEqual(1.5514446, sess.run(model_fn_ops.loss))
+
+  def testMultiClassWithWeight(self):
+    head = head_lib._multi_class_head(
+        n_classes=3, weight_column_name="label_weight")
+    with tf.Graph().as_default(), tf.Session() as sess:
+      features = {"label_weight": tf.constant([0.1])}
+      logits = tf.constant([[1., 0., 0.]])
+      targets = tf.constant([2])
+      # logloss: z:label, x:logit
+      # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
+      model_fn_ops = head.head_ops(features, targets,
+                                   tf.contrib.learn.ModeKeys.TRAIN,
+                                   None, logits=logits)
+      self.assertAlmostEqual(.15514446, sess.run(model_fn_ops.loss))
+
+  def testMultiClassWithInvalidNClass(self):
+    try:
+      head_lib._multi_class_head(n_classes=1)
+      self.fail("Softmax with no n_classes did not raise error.")
+    except ValueError:
+      # Expected
+      pass
+
+
+class BinarySvmModelHeadTest(tf.test.TestCase):
+
+  def testBinarySVMDefaultWeights(self):
+    head = head_lib._binary_svm_head()
+    predictions = tf.constant([[-0.5], [1.2]])
+    targets = tf.constant([0, 1])
+    model_fn_ops = head.head_ops({}, targets,
+                                 tf.contrib.learn.ModeKeys.TRAIN,
+                                 None, logits=predictions)
+    # Prediction for first example is in the right side of the hyperplane (i.e.,
+    # < 0) but it is within the [-1,1] margin. There is a 0.5 loss incurred by
+    # this example. The 2nd prediction is outside the margin so it incurs no
+    # loss at all. The overall (normalized) loss is therefore 0.5/(1+1) = 0.25.
+    with tf.Session() as sess:
+      self.assertAlmostEqual(0.25, sess.run(model_fn_ops.loss))
+
+  def testBinarySVMWithWeights(self):
+    head = head_lib._binary_svm_head(
+        weight_column_name="weights")
+    predictions = tf.constant([[-0.7], [0.2]])
+    targets = tf.constant([0, 1])
+    features = {"weights": tf.constant([2.0, 10.0])}
+    model_fn_ops = head.head_ops(features, targets,
+                                 tf.contrib.learn.ModeKeys.TRAIN,
+                                 None, logits=predictions)
+    # Prediction for both examples are in the right side of the hyperplane but
+    # within the margin. The (weighted) loss incurred is 2*0.3=0.6 and 10*0.8=8
+    # respectively. The overall (normalized) loss is therefore 8.6/12.
+    with tf.Session() as sess:
+      self.assertAlmostEqual(8.6 / 2, sess.run(model_fn_ops.loss), places=3)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py
index e7ed35712a9..f4d1fb977af 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py
@@ -34,7 +34,6 @@ from tensorflow.contrib.framework.python.ops import variables as contrib_variabl
 from tensorflow.contrib.layers.python.layers import target_column
 from tensorflow.contrib.learn.python.learn import evaluable
 from tensorflow.contrib.learn.python.learn import metric_spec
-from tensorflow.contrib.learn.python.learn import session_run_hook
 from tensorflow.contrib.learn.python.learn import trainable
 from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined
 from tensorflow.contrib.learn.python.learn.estimators import estimator
@@ -53,12 +52,18 @@ from tensorflow.python.ops import nn
 from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import session_run_hook
 from tensorflow.python.training import training as train
 
 _CLASSES = "classes"
 _LOGISTIC = "logistic"
 _PROBABILITIES = "probabilities"
 
+# The default learning rate of 0.2 is a historical artifact of the initial
+# implementation, but seems a reasonable choice.
+_LEARNING_RATE = 0.2
+
 
 def _get_metric_args(metric):
   if hasattr(metric, "__code__"):
@@ -86,7 +91,7 @@ def _wrap_metric(metric):
 def _get_optimizer(spec):
   if isinstance(spec, six.string_types):
     return layers.OPTIMIZER_CLS_NAMES[spec](
-        learning_rate=0.2)
+        learning_rate=_LEARNING_RATE)
   elif callable(spec):
     return spec()
   return spec
@@ -171,10 +176,45 @@ def _weighted_loss(loss, weight_tensor):
 
 
 def _linear_classifier_model_fn(features, targets, mode, params):
-  """Estimator's linear model_fn."""
+  """Linear classifier model_fn.
+
+  Args:
+    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
+    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
+      dtype `int32` or `int64` in the range `[0, n_classes)`.
+    mode: Defines whether this is training, evaluation or prediction.
+      See `ModeKeys`.
+    params: A dict of hyperparameters.
+      The following hyperparameters are expected:
+      * feature_columns: An iterable containing all the feature columns used by
+          the model.
+      * n_classes: number of target classes.
+      * weight_column_name: A string defining the weight feature column, or
+          None if there are no weights.
+      * optimizer: string, `Optimizer` object, or callable that defines the
+          optimizer to use for training.
+      * gradient_clip_norm: A float > 0. If provided, gradients are
+          clipped to their global norm with this clipping ratio.
+      * enable_centered_bias: A bool. If True, estimator will learn a centered
+          bias variable for each class. Rest of the model structure learns the
+          residual after centered bias.
+      * num_ps_replicas: The number of parameter server replicas.
+      * joint_weights: If True, the weights for all columns will be stored in a
+        single (possibly partitioned) variable. It's more efficient, but it's
+        incompatible with SDCAOptimizer, and requires all feature columns are
+        sparse and use the 'sum' combiner.
+
+  Returns:
+    predictions: A dict of `Tensor` objects.
+    loss: A scalar containing the loss of the step.
+    train_op: The op for training.
+
+  Raises:
+    ValueError: If mode is not any of the `ModeKeys`.
+  """
+  feature_columns = params["feature_columns"]
   n_classes = params["n_classes"]
   weight_column_name = params["weight_column_name"]
-  feature_columns = params["feature_columns"]
   optimizer = params["optimizer"]
   gradient_clip_norm = params.get("gradient_clip_norm", None)
   enable_centered_bias = params.get("enable_centered_bias", True)
@@ -184,25 +224,24 @@ def _linear_classifier_model_fn(features, targets, mode, params):
   if not isinstance(features, dict):
     features = {"": features}
 
+  parent_scope = "linear"
   num_label_columns = 1 if n_classes == 2 else n_classes
   loss_fn = _softmax_cross_entropy_loss
   if n_classes == 2:
     loss_fn = _log_loss_with_two_classes
 
-  feat_values = (features.values() if isinstance(features, dict)
-                 else [features])
   partitioner = partitioned_variables.min_max_variable_partitioner(
       max_partitions=num_ps_replicas,
       min_slice_size=64 << 20)
   with variable_scope.variable_op_scope(
-      feat_values, "linear", partitioner=partitioner) as scope:
+      features.values(), parent_scope, partitioner=partitioner) as scope:
     if joint_weights:
       logits, _, _ = (
           layers.joint_weighted_sum_from_feature_columns(
               columns_to_tensors=features,
               feature_columns=feature_columns,
               num_outputs=num_label_columns,
-              weight_collections=["linear"],
+              weight_collections=[parent_scope],
               scope=scope))
     else:
       logits, _, _ = (
@@ -210,7 +249,7 @@ def _linear_classifier_model_fn(features, targets, mode, params):
               columns_to_tensors=features,
               feature_columns=feature_columns,
               num_outputs=num_label_columns,
-              weight_collections=["linear"],
+              weight_collections=[parent_scope],
               scope=scope))
 
   if enable_centered_bias:
@@ -252,11 +291,39 @@ def _linear_classifier_model_fn(features, targets, mode, params):
 
 
 def sdca_classifier_model_fn(features, targets, mode, params):
-  """Estimator's linear model_fn."""
+  """Linear classifier model_fn that uses the SDCA optimizer.
+
+  Args:
+    features: A dict of `Tensor` keyed by column name.
+    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
+      dtype `int32` or `int64` in the range `[0, n_classes)`.
+    mode: Defines whether this is training, evaluation or prediction.
+      See `ModeKeys`.
+    params: A dict of hyperparameters.
+      The following hyperparameters are expected:
+      * feature_columns: An iterable containing all the feature columns used by
+          the model.
+      * optimizer: An `SDCAOptimizer` instance.
+      * weight_column_name: A string defining the weight feature column, or
+          None if there are no weights.
+      * loss_type: A string. Must be either "logistic_loss" or "hinge_loss".
+      * update_weights_hook: A `SessionRunHook` object or None. Used to update
+          model weights.
+
+  Returns:
+    predictions: A dict of `Tensor` objects.
+    loss: A scalar containing the loss of the step.
+    train_op: The op for training.
+
+  Raises:
+    ValueError: If `optimizer` is not an `SDCAOptimizer` instance.
+    ValueError: If mode is not any of the `ModeKeys`.
+  """
   feature_columns = params["feature_columns"]
   optimizer = params["optimizer"]
   weight_column_name = params["weight_column_name"]
   loss_type = params["loss_type"]
+  update_weights_hook = params.get("update_weights_hook")
 
   if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
     raise ValueError("Optimizer must be of type SDCAOptimizer")
@@ -283,9 +350,12 @@ def sdca_classifier_model_fn(features, targets, mode, params):
   train_op = None
   if mode == estimator.ModeKeys.TRAIN:
     global_step = contrib_variables.get_global_step()
-    train_op = optimizer.get_train_step(
-        columns_to_variables, weight_column_name, loss_type, features,
-        targets, global_step)
+    sdca_model, train_op = optimizer.get_train_step(columns_to_variables,
+                                                    weight_column_name,
+                                                    loss_type, features,
+                                                    targets, global_step)
+    if update_weights_hook is not None:
+      update_weights_hook.set_parameters(sdca_model, train_op)
 
   predictions = {}
   predictions[_LOGISTIC] = math_ops.sigmoid(logits)
@@ -298,10 +368,32 @@ def sdca_classifier_model_fn(features, targets, mode, params):
 
 # Ensures consistency with LinearComposableModel.
 def _get_default_optimizer(feature_columns):
-  learning_rate = min(0.2, 1.0 / math.sqrt(len(feature_columns)))
+  learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns)))
   return train.FtrlOptimizer(learning_rate=learning_rate)
 
 
+class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook):
+  """SessionRunHook to update and shrink SDCA model weights."""
+
+  def __init__(self):
+    pass
+
+  def set_parameters(self, sdca_model, train_op):
+    self._sdca_model = sdca_model
+    self._train_op = train_op
+
+  def begin(self):
+    """Construct the update_weights op.
+
+    The op is implicitly added to the default graph.
+    """
+    self._update_op = self._sdca_model.update_weights(self._train_op)
+
+  def before_run(self, run_context):
+    """Return the update_weights op so that it is executed during this run."""
+    return session_run_hook.SessionRunArgs(self._update_op)
+
+
 class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
   """Linear classifier model.
 
@@ -431,15 +523,23 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
       self._optimizer = _get_optimizer(optimizer)
     num_ps_replicas = config.num_ps_replicas if config else 0
 
+    chief_hook = None
     if isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
       assert not _joint_weight, ("_joint_weight is incompatible with the"
                                  " SDCAOptimizer")
       model_fn = sdca_classifier_model_fn
+      # We use a hook to perform the weight update and shrink step only on the
+      # chief. Because the SdcaModel constructed by the estimator within the
+      # call to fit() but we need to pass the hook to fit(), we pass the hook
+      # as a parameter to the model_fn and have that propagate the model to the
+      # hook.
+      chief_hook = _SdcaUpdateWeightsHook()
       params = {
           "feature_columns": feature_columns,
           "optimizer": self._optimizer,
           "weight_column_name": weight_column_name,
           "loss_type": "logistic_loss",
+          "update_weights_hook": chief_hook,
       }
     else:
       model_fn = _linear_classifier_model_fn
@@ -461,6 +561,10 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
         params=params,
         feature_engineering_fn=feature_engineering_fn)
 
+    self._additional_run_hook = None
+    if self._estimator.config.is_chief:
+      self._additional_run_hook = chief_hook
+
   def get_estimator(self):
     return self._estimator
 
@@ -468,22 +572,24 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
           monitors=None, max_steps=None):
     """See trainable.Trainable."""
     # TODO(roumposg): Remove when deprecated monitors are removed.
-    if monitors is not None:
-      deprecated_monitors = [
-          m for m in monitors
-          if not isinstance(m, session_run_hook.SessionRunHook)
-      ]
-      for monitor in deprecated_monitors:
-        monitor.set_estimator(self)
-        monitor._lock_estimator()  # pylint: disable=protected-access
+    if monitors is None:
+      monitors = []
+    deprecated_monitors = [
+        m for m in monitors
+        if not isinstance(m, session_run_hook.SessionRunHook)
+    ]
+    for monitor in deprecated_monitors:
+      monitor.set_estimator(self)
+      monitor._lock_estimator()  # pylint: disable=protected-access
 
+    if self._additional_run_hook:
+      monitors.append(self._additional_run_hook)
     result = self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps,
                                  batch_size=batch_size, monitors=monitors,
                                  max_steps=max_steps)
 
-    if monitors is not None:
-      for monitor in deprecated_monitors:
-        monitor._unlock_estimator()  # pylint: disable=protected-access
+    for monitor in deprecated_monitors:
+      monitor._unlock_estimator()  # pylint: disable=protected-access
 
     return result
 
@@ -712,6 +818,12 @@ class LinearRegressor(dnn_linear_combined.DNNLinearCombinedRegressor):
     if enable_centered_bias is None:
       enable_centered_bias = True
       dnn_linear_combined._changing_default_center_bias()  # pylint: disable=protected-access
+
+    if isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
+      enable_centered_bias = False
+      logging.warning("centered_bias is not supported with SDCA, "
+                      "please disable it explicitly.")
+    self._weight_column_name = weight_column_name
     self._joint_weights = _joint_weights
     super(LinearRegressor, self).__init__(
         model_dir=model_dir,
@@ -737,20 +849,22 @@ class LinearRegressor(dnn_linear_combined.DNNLinearCombinedRegressor):
         layers.weighted_sum_from_feature_columns(
             columns_to_tensors=features,
             feature_columns=self._linear_feature_columns,
-            num_outputs=self._target_column.num_label_columns,
+            num_outputs=self._head.logits_dimension,
             weight_collections=[self._linear_model.get_scope_name()],
             scope=self._linear_model.get_scope_name()))
-    with ops.control_dependencies([self._centered_bias()]):
-      loss = self._target_column.loss(logits, targets, features)
-      logging_ops.scalar_summary("loss", loss)
+    _add_bias_column(self._linear_feature_columns, features, bias, targets,
+                     columns_to_variables)
 
-      _add_bias_column(self._linear_feature_columns, features, bias, targets,
-                       columns_to_variables)
+    def _train_op_fn(unused_loss):
+      sdca_model, train_op = self._linear_optimizer.get_train_step(
+          columns_to_variables, self._weight_column_name,
+          self._loss_type(), features, targets, global_step)
+      return sdca_model.update_weights(train_op)
 
-    train_op = self._linear_optimizer.get_train_step(
-        columns_to_variables, self._target_column.weight_column_name,
-        self._loss_type(), features, targets, global_step)
-    return train_op, loss
+    model_fn_ops = self._head.head_ops(features, targets,
+                                       estimator.ModeKeys.TRAIN, _train_op_fn,
+                                       logits=logits)
+    return model_fn_ops.training_op, model_fn_ops.loss
 
   def _loss_type(self):
     return "squared_loss"
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py
index 3156b86970e..d19ae78951b 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py
@@ -257,10 +257,11 @@ class LinearClassifierTest(tf.test.TestCase):
   def testCustomMetrics(self):
     """Tests custom evaluation metrics."""
 
-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
       target = tf.constant([[1], [0], [0], [0]], dtype=tf.float32)
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32)}
+      features = {'x': tf.train.limit_epochs(
+          tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)}
       return features, target
 
     def _my_metric_op(predictions, targets):
@@ -272,9 +273,9 @@ class LinearClassifierTest(tf.test.TestCase):
     classifier = tf.contrib.learn.LinearClassifier(
         feature_columns=[tf.contrib.layers.real_valued_column('x')])
 
-    classifier.fit(input_fn=_input_fn_train, steps=100)
+    classifier.fit(input_fn=_input_fn, steps=100)
     scores = classifier.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
         steps=100,
         metrics={
             'my_accuracy': MetricSpec(
@@ -289,7 +290,8 @@ class LinearClassifierTest(tf.test.TestCase):
     self.assertTrue(
         set(['loss', 'my_accuracy', 'my_precision', 'my_metric'
             ]).issubset(set(scores.keys())))
-    predictions = classifier.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(list(classifier.predict(input_fn=predict_input_fn)))
     self.assertEqual(_sklearn.accuracy_score([1, 0, 0, 0], predictions),
                      scores['my_accuracy'])
 
@@ -297,14 +299,14 @@ class LinearClassifierTest(tf.test.TestCase):
     # "probabilities".
     with self.assertRaises(ValueError):
       classifier.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
           steps=100,
           metrics={('bad_name', 'bad_type'): tf.contrib.metrics.streaming_auc})
 
     # Test the case where the tuple of the key doesn't have 2 elements.
     with self.assertRaises(ValueError):
       classifier.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
           steps=100,
           metrics={
               ('bad_length_name', 'classes', 'bad_length'):
@@ -987,10 +989,11 @@ class LinearRegressorTest(tf.test.TestCase):
 
   def testCustomMetrics(self):
     """Tests custom evaluation metrics."""
-    def _input_fn_train():
+    def _input_fn(num_epochs=None):
       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
       target = tf.constant([[1.], [0.], [0.], [0.]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      features = {'x': tf.train.limit_epochs(
+          tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)}
       return features, target
 
     def _my_metric_op(predictions, targets):
@@ -1000,9 +1003,9 @@ class LinearRegressorTest(tf.test.TestCase):
         feature_columns=[tf.contrib.layers.real_valued_column('x')],
         config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
-    regressor.fit(input_fn=_input_fn_train, steps=100)
+    regressor.fit(input_fn=_input_fn, steps=100)
     scores = regressor.evaluate(
-        input_fn=_input_fn_train,
+        input_fn=_input_fn,
         steps=1,
         metrics={
             'my_error': tf.contrib.metrics.streaming_mean_squared_error,
@@ -1011,15 +1014,16 @@ class LinearRegressorTest(tf.test.TestCase):
     self.assertIn('loss', set(scores.keys()))
     self.assertIn('my_error', set(scores.keys()))
     self.assertIn('my_metric', set(scores.keys()))
-    predictions = regressor.predict(input_fn=_input_fn_train)
+    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
+    predictions = np.array(list(regressor.predict(input_fn=predict_input_fn)))
     self.assertAlmostEqual(
         _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
         scores['my_error'])
 
     # Tests that when the key is a tuple, an error is raised.
-    with self.assertRaises(TypeError):
+    with self.assertRaises(KeyError):
       regressor.evaluate(
-          input_fn=_input_fn_train,
+          input_fn=_input_fn,
           steps=1,
           metrics={('my_error', 'predictions'
                    ): tf.contrib.metrics.streaming_mean_squared_error})
diff --git a/tensorflow/contrib/learn/python/learn/tests/multioutput_test.py b/tensorflow/contrib/learn/python/learn/estimators/multioutput_test.py
similarity index 95%
rename from tensorflow/contrib/learn/python/learn/tests/multioutput_test.py
rename to tensorflow/contrib/learn/python/learn/estimators/multioutput_test.py
index a51f7468905..4feb67e7faa 100644
--- a/tensorflow/contrib/learn/python/learn/tests/multioutput_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/multioutput_test.py
@@ -39,7 +39,7 @@ class MultiOutputTest(tf.test.TestCase):
         feature_columns=learn.infer_real_valued_columns_from_input(x),
         target_dimension=2)
     regressor.fit(x, y, steps=100)
-    score = mean_squared_error(regressor.predict(x), y)
+    score = mean_squared_error(np.array(list(regressor.predict(x))), y)
     self.assertLess(score, 10, "Failed with score = {0}".format(score))
 
 
diff --git a/tensorflow/contrib/learn/python/learn/tests/nonlinear_test.py b/tensorflow/contrib/learn/python/learn/estimators/nonlinear_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/tests/nonlinear_test.py
rename to tensorflow/contrib/learn/python/learn/estimators/nonlinear_test.py
diff --git a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
index 318891f0fd5..73810352394 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py
@@ -114,7 +114,7 @@ class TensorForestEstimator(estimator.BaseEstimator):
       as_iterable=False)
   def predict_proba(
       self, x=None, input_fn=None, batch_size=None, outputs=None,
-      as_iterable=False):
+      as_iterable=True):
     """Returns prediction probabilities for given features (classification).
 
     Args:
@@ -148,7 +148,7 @@ class TensorForestEstimator(estimator.BaseEstimator):
       as_iterable=False)
   def predict(
       self, x=None, input_fn=None, axis=None, batch_size=None, outputs=None,
-      as_iterable=False):
+      as_iterable=True):
     """Returns predictions for given features.
 
     Args:
diff --git a/tensorflow/contrib/learn/python/learn/tests/regression_test.py b/tensorflow/contrib/learn/python/learn/estimators/regression_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/tests/regression_test.py
rename to tensorflow/contrib/learn/python/learn/estimators/regression_test.py
diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
index 5d6e67484f3..20cc7485753 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
@@ -130,6 +130,8 @@ class RunConfig(object):
     # If not explicitly specified in the constructor and the TF_CONFIG
     # environment variable is present, load cluster_spec from TF_CONFIG.
     config = json.loads(os.environ.get('TF_CONFIG') or '{}')
+    environment = config.get('environment', 'local')
+
     if not cluster_spec and 'cluster' in config:
       cluster_spec = ClusterSpec(config['cluster'])
     self.cluster_spec = cluster_spec
@@ -138,6 +140,7 @@ class RunConfig(object):
     # otherwise, if the TF_CONFIG environment variable is present, use that.
     # Otherwise, use the respective default (None / 0).
     task_env = config.get('task', {})
+
     self._job_name = job_name or task_env.get('type') or None
     self.task = task if task is not None else task_env.get('index') or 0
 
@@ -151,11 +154,13 @@ class RunConfig(object):
     self.num_ps_replicas = num_ps_replicas or _count_ps(self.cluster_spec) or 0
 
     # Set is_chief.
+    # TODO(b/32117298): cleanup environment-specific logic for setting is_chief
+    # once the environments have been further unified.
     self._is_chief = is_chief
     if self._is_chief is None:
       if not self._job_name:
         self._is_chief = (self.task == 0)
-      elif config:
+      elif config and environment == 'cloud':
         # When the TF_CONFIG environment variable is set, we can set the
         # default of is_chief to 0 when job_name is "master" and task is 0.
         self._is_chief = (self._job_name == 'master' and self.task == 0)
@@ -176,11 +181,19 @@ class RunConfig(object):
             'job_name is \'%s\', but only masters or workers may be chiefs. '
             'Please check is_chief and job_name, which may have been set in '
             'TF_CONFIG environment variable.' % (self._job_name,))
-    elif (self._is_chief is False and self._job_name == 'master' and
-          self.task == 0):
-      raise ValueError(
-          'Master task 0 must be chief. Please check is_chief, job_name, and '
-          'task, which may have been set in TF_CONFIG environment variable.')
+    elif self._is_chief is False:
+      if environment == 'cloud':
+        if self._job_name == 'master' and self.task == 0:
+          raise ValueError(
+              'Master task 0 must be chief for cloud. Please check is_chief, '
+              'job_name, and task, which may have been set in TF_CONFIG '
+              'environment variable.')
+      else:
+        if self._job_name == 'worker' and self.task == 0:
+          raise ValueError(
+              'Worker task 0 must be chief. Please check is_chief, job_name, '
+              'and task, which may have been set in TF_CONFIG environment '
+              'variable.')
 
     self.evaluation_master = evaluation_master or ''
 
diff --git a/tensorflow/contrib/learn/python/learn/tests/run_config_test.py b/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py
similarity index 84%
rename from tensorflow/contrib/learn/python/learn/tests/run_config_test.py
rename to tensorflow/contrib/learn/python/learn/estimators/run_config_test.py
index 4164b450452..b72c720aa6d 100644
--- a/tensorflow/contrib/learn/python/learn/tests/run_config_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py
@@ -189,20 +189,47 @@ class RunConfigTest(tf.test.TestCase):
     # Basically, just make sure no exception is being raised.
     self.assertEquals(config.num_ps_replicas, 2)
 
-  def test_is_chief_from_tf_config(self):
+  def test_is_chief_from_cloud_tf_config(self):
     # is_chief should be true when ["task"]["type"] == "master" and
-    # index == 0. Note that test_values_from_tf_config covers the
-    # non-master case.
+    # index == 0 and ["task"]["environment"] == "cloud". Note that
+    # test_values_from_tf_config covers the non-master case.
     tf_config = {"cluster": {"ps": ["host1:1", "host2:2"],
                              "master": ["host3:3"],
                              "worker": ["host4:4", "host5:5", "host6:6"]},
                  "task": {"type": "master",
-                          "index": 0}}
+                          "index": 0},
+                 "environment": "cloud"}
     with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
       config = run_config.RunConfig()
 
     self.assertTrue(config.is_chief)
 
+  def test_is_chief_from_noncloud_tf_config(self):
+    # is_chief should be true when ["task"]["type"] == "worker" and
+    # index == 0 if ["task"]["environment"] != "cloud".
+    tf_config = {"cluster": {"ps": ["host1:1", "host2:2"],
+                             "master": ["host3:3"],
+                             "worker": ["host4:4", "host5:5", "host6:6"]},
+                 "task": {"type": "worker",
+                          "index": 0},
+                 "environment": "random"}
+    with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
+      config = run_config.RunConfig()
+
+    self.assertTrue(config.is_chief)
+
+    # But task 0 for a job named "master" should not be.
+    tf_config = {"cluster": {"ps": ["host1:1", "host2:2"],
+                             "master": ["host3:3"],
+                             "worker": ["host4:4", "host5:5", "host6:6"]},
+                 "task": {"type": "master",
+                          "index": 0},
+                 "environment": "random"}
+    with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
+      config = run_config.RunConfig()
+
+    self.assertFalse(config.is_chief)
+
   def test_default_is_chief_from_tf_config_without_job_name(self):
     tf_config = {"cluster": {},
                  "task": {}}
@@ -245,8 +272,15 @@ class RunConfigTest(tf.test.TestCase):
     with self.assertRaisesRegexp(ValueError, msg):
       run_config.RunConfig(is_chief=True, task=0, job_name="ps")
 
-    with self.assertRaisesRegexp(ValueError, "Master task 0 must be chief"):
-      run_config.RunConfig(is_chief=False, task=0, job_name="master")
+    msg = "Master task 0 must be chief for cloud"
+    with self.assertRaisesRegexp(ValueError, msg):
+      tf_config = {"environment": "cloud"}
+      with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}):
+        run_config.RunConfig(is_chief=False, task=0, job_name="master")
+
+    msg = "Worker task 0 must be chief"
+    with self.assertRaisesRegexp(ValueError, msg):
+      run_config.RunConfig(is_chief=False, task=0, job_name="worker")
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/learn/python/learn/tests/stability_test.py b/tensorflow/contrib/learn/python/learn/estimators/stability_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/tests/stability_test.py
rename to tensorflow/contrib/learn/python/learn/estimators/stability_test.py
diff --git a/tensorflow/contrib/learn/python/learn/estimators/svm.py b/tensorflow/contrib/learn/python/learn/estimators/svm.py
index 84cf8ae71a4..25d0e79c737 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/svm.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/svm.py
@@ -55,7 +55,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
   method), should be set to (#concurrent train ops per worker) x (#workers). If
   num_loss_partitions is larger or equal to this value, convergence is
   guaranteed but becomes slower as num_loss_partitions increases. If it is set
-  to a smaller value, the optimizer is more agressive in reducing the global
+  to a smaller value, the optimizer is more aggressive in reducing the global
   loss but convergence is not guaranteed. The recommended value in tf.learn
   (where there is one process per worker) is the number of workers running the
   train steps. It defaults to 1 (single machine).
@@ -146,6 +146,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
 
     self._feature_columns = feature_columns
     self._model_dir = model_dir or tempfile.mkdtemp()
+    self._chief_hook = linear._SdcaUpdateWeightsHook()  # pylint: disable=protected-access
     self._estimator = estimator.Estimator(
         model_fn=linear.sdca_classifier_model_fn,
         model_dir=self._model_dir,
@@ -155,12 +156,19 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
             "optimizer": self._optimizer,
             "weight_column_name": weight_column_name,
             "loss_type": "hinge_loss",
+            "update_weights_hook": self._chief_hook,
         },
         feature_engineering_fn=feature_engineering_fn)
+    if not self._estimator.config.is_chief:
+      self._chief_hook = None
 
   def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None,
           monitors=None, max_steps=None):
     """See trainable.Trainable."""
+    if monitors is None:
+      monitors = []
+    if self._chief_hook:
+      monitors.append(self._chief_hook)
     return self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps,
                                batch_size=batch_size, monitors=monitors,
                                max_steps=max_steps)
diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py
index c10d014c142..8468d04e488 100644
--- a/tensorflow/contrib/learn/python/learn/experiment.py
+++ b/tensorflow/contrib/learn/python/learn/experiment.py
@@ -223,6 +223,7 @@ class Experiment(object):
       logging.info("Waiting %f secs before starting eval.", delay_secs)
       time.sleep(delay_secs)
 
+    last_fitted_error_time = 0
     while True:
       start = time.time()
       try:
@@ -231,7 +232,13 @@ class Experiment(object):
                                  metrics=self._eval_metrics,
                                  name=name)
       except NotFittedError:
-        logging.warning("Estimator is not fitted yet, skipping evaluation.")
+        # Print warning message every 10 mins.
+        if time.time() - last_fitted_error_time > 600:
+          logging.warning(
+              "Estimator is not fitted yet. "
+              "Will start an evaluation when a checkpoint will be ready.")
+          last_fitted_error_time = time.time()
+
       duration = time.time() - start
       if duration < throttle_delay_secs:
         difference = throttle_delay_secs - duration
diff --git a/tensorflow/contrib/learn/python/learn/tests/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/tests/experiment_test.py
rename to tensorflow/contrib/learn/python/learn/experiment_test.py
diff --git a/tensorflow/contrib/learn/python/learn/graph_actions.py b/tensorflow/contrib/learn/python/learn/graph_actions.py
index f74a6a204c5..583a12af8ca 100644
--- a/tensorflow/contrib/learn/python/learn/graph_actions.py
+++ b/tensorflow/contrib/learn/python/learn/graph_actions.py
@@ -126,6 +126,7 @@ def _monitored_train(graph,
                      supervisor_save_model_secs=600,
                      supervisor_save_model_steps=None,
                      keep_checkpoint_max=5,
+                     supervisor_save_summaries_secs=None,
                      supervisor_save_summaries_steps=100,
                      feed_fn=None,
                      steps=None,
@@ -164,7 +165,7 @@ def _monitored_train(graph,
       current loss. A `0` or negative value disables logging.
     supervisor_is_chief: Whether the current process is the chief supervisor in
       charge of restoring the model and running standard services.
-    supervisor_master: The master string to use when preparing the session.      
+    supervisor_master: The master string to use when preparing the session.
     supervisor_save_model_secs: Save checkpoints every this many seconds. Can
         not be specified with `supervisor_save_model_steps`.
     supervisor_save_model_steps: Save checkpoints every this many steps. Can not
@@ -173,8 +174,12 @@ def _monitored_train(graph,
       keep. As new files are created, older files are deleted. If None or 0,
       all checkpoint files are kept. This is simply passed as the max_to_keep
       arg to `tf.Saver` constructor.
+    supervisor_save_summaries_secs: Save summaries every
+      `supervisor_save_summaries_secs` seconds when training.
     supervisor_save_summaries_steps: Save summaries every
-      `supervisor_save_summaries_steps` seconds when training.
+      `supervisor_save_summaries_steps` steps when training. Exactly one of
+      `supervisor_save_model_steps` and `supervisor_save_model_secs` should be
+      specified, and the other should be None.
     feed_fn: A function that is called every iteration to produce a `feed_dict`
       passed to `session.run` calls. Optional.
     steps: Trains for this many steps (e.g. current global step + `steps`).
@@ -267,6 +272,7 @@ def _monitored_train(graph,
               summary_writer=summary_writer))
       all_hooks.append(
           basic_session_run_hooks.SummarySaverHook(
+              save_secs=supervisor_save_summaries_secs,
               save_steps=supervisor_save_summaries_steps,
               summary_writer=summary_writer,
               scaffold=scaffold))
diff --git a/tensorflow/contrib/learn/python/learn/tests/graph_actions_test.py b/tensorflow/contrib/learn/python/learn/graph_actions_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/tests/graph_actions_test.py
rename to tensorflow/contrib/learn/python/learn/graph_actions_test.py
diff --git a/tensorflow/contrib/learn/python/learn/tests/grid_search_test.py b/tensorflow/contrib/learn/python/learn/grid_search_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/tests/grid_search_test.py
rename to tensorflow/contrib/learn/python/learn/grid_search_test.py
diff --git a/tensorflow/contrib/learn/python/learn/tests/data_feeder_test.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/tests/data_feeder_test.py
rename to tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
index c41bcf45a1a..21ce65b7eb4 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
@@ -28,15 +28,33 @@ from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import variables as var_ops
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import input as input_ops
 from tensorflow.python.training import queue_runner
 
+
 # Default name for key in the feature dict.
 KEY_FEATURE_NAME = '__key__'
 
 
+def _check_enqueue_params(num_queue_runners, num_enqueue_threads):
+  """Check enqueue paramerters for deprecation of `num_queue_runners`."""
+  if num_queue_runners is not None:
+    # TODO(yifanchen): Remove on Nov 21 2016.
+    logging.warning('`num_queue_runners` is deprecated, it will be removed on '
+                    'Nov 21 2016')
+    if num_enqueue_threads is not None:
+      raise ValueError('`num_queue_runners` and `num_enqueue_threads` can not '
+                       'both be set.')
+  elif num_enqueue_threads is None:
+    logging.warning('Default behavior will change and `num_queue_runners` '
+                    'will be replaced by `num_enqueue_threads`.')
+    num_queue_runners = 2
+  return num_queue_runners, num_enqueue_threads
+
+
 def read_batch_examples(file_pattern, batch_size, reader,
                         randomize_input=True, num_epochs=None,
                         queue_capacity=10000, num_threads=1,
@@ -134,7 +152,107 @@ def read_keyed_batch_examples(
   Raises:
     ValueError: for invalid inputs.
   """
-  # Retrieve files to read.
+  return _read_keyed_batch_examples_helper(
+      file_pattern,
+      batch_size,
+      reader,
+      randomize_input,
+      num_epochs,
+      queue_capacity,
+      num_threads,
+      read_batch_size,
+      parse_fn,
+      setup_shared_queue=False,
+      name=name)
+
+
+def _read_keyed_batch_examples_shared_queue(file_pattern,
+                                            batch_size,
+                                            reader,
+                                            randomize_input=True,
+                                            num_epochs=None,
+                                            queue_capacity=10000,
+                                            num_threads=1,
+                                            read_batch_size=1,
+                                            parse_fn=None,
+                                            name=None):
+  """Adds operations to read, queue, batch `Example` protos.
+
+  Given file pattern (or list of files), will setup a shared queue for file
+  names, setup a worker queue that pulls from the shared queue, read `Example`
+  protos using provided `reader`, use batch queue to create batches of examples
+  of size `batch_size`. This provides at most once visit guarantees. Note that
+  this only works if the parameter servers are not pre-empted or restarted or
+  the session is not restored from a checkpoint since the state of a queue
+  is not checkpointed and we will end up restarting from the entire list of
+  files.
+
+  All queue runners are added to the queue runners collection, and may be
+  started via `start_queue_runners`.
+
+  All ops are added to the default graph.
+
+  Use `parse_fn` if you need to do parsing / processing on single examples.
+
+  Args:
+    file_pattern: List of files or pattern of file paths containing
+        `Example` records. See `tf.gfile.Glob` for pattern rules.
+    batch_size: An int or scalar `Tensor` specifying the batch size to use.
+    reader: A function or class that returns an object with
+      `read` method, (filename tensor) -> (example tensor).
+    randomize_input: Whether the input should be randomized.
+    num_epochs: Integer specifying the number of times to read through the
+      dataset. If `None`, cycles through the dataset forever.
+      NOTE - If specified, creates a variable that must be initialized, so call
+      `tf.initialize_all_variables()` as shown in the tests.
+    queue_capacity: Capacity for input queue.
+    num_threads: The number of threads enqueuing examples.
+    read_batch_size: An int or scalar `Tensor` specifying the number of
+      records to read at once
+    parse_fn: Parsing function, takes `Example` Tensor returns parsed
+      representation. If `None`, no parsing is done.
+    name: Name of resulting op.
+
+  Returns:
+    Returns tuple of:
+    - `Tensor` of string keys.
+    - String `Tensor` of batched `Example` proto.
+
+  Raises:
+    ValueError: for invalid inputs.
+  """
+  return _read_keyed_batch_examples_helper(
+      file_pattern,
+      batch_size,
+      reader,
+      randomize_input,
+      num_epochs,
+      queue_capacity,
+      num_threads,
+      read_batch_size,
+      parse_fn,
+      setup_shared_queue=True,
+      name=name)
+
+
+def _get_shared_file_name_queue(file_names, shuffle, num_epochs, name):
+  # Creating a dummy variable so we can put the shared queue in ps if there is
+  # a PS and in the worker otherwise. TODO(rohanj): Figure out how to place an
+  # op on PS without this hack
+  with ops.Graph().as_default():
+    dummy_var = var_ops.Variable(initial_value=0, name='dummy_var')
+  with ops.device(dummy_var.device):
+    shared_file_name_queue = input_ops.string_input_producer(
+        constant_op.constant(
+            file_names, name='input'),
+        shuffle=shuffle,
+        num_epochs=num_epochs,
+        name=name,
+        shared_name=name)
+    return shared_file_name_queue
+
+
+def _get_file_names(file_pattern, randomize_input):
   if isinstance(file_pattern, list):
     file_names = file_pattern
     if not file_names:
@@ -148,6 +266,46 @@ def read_keyed_batch_examples(
   # in `string_input_producer` if `randomize_input` is enabled.
   if not randomize_input:
     file_names = sorted(file_names)
+  return file_names
+
+
+def _get_examples(file_name_queue, reader, num_threads, read_batch_size,
+                  parse_fn):
+  with ops.name_scope('read'):
+    example_list = []
+    for _ in range(num_threads):
+      if read_batch_size > 1:
+        keys, examples_proto = reader().read_up_to(file_name_queue,
+                                                   read_batch_size)
+      else:
+        keys, examples_proto = reader().read(file_name_queue)
+      if parse_fn:
+        parsed_examples = parse_fn(examples_proto)
+        # Map keys into example map because batch_join doesn't support
+        # tuple of Tensor + dict.
+        if isinstance(parsed_examples, dict):
+          parsed_examples[KEY_FEATURE_NAME] = keys
+          example_list.append(parsed_examples)
+        else:
+          example_list.append((keys, parsed_examples))
+      else:
+        example_list.append((keys, examples_proto))
+    return example_list
+
+
+def _read_keyed_batch_examples_helper(file_pattern,
+                                      batch_size,
+                                      reader,
+                                      randomize_input=True,
+                                      num_epochs=None,
+                                      queue_capacity=10000,
+                                      num_threads=1,
+                                      read_batch_size=1,
+                                      parse_fn=None,
+                                      setup_shared_queue=False,
+                                      name=None):
+  # Retrieve files to read.
+  file_names = _get_file_names(file_pattern, randomize_input)
 
   # Check input parameters are given and reasonable.
   if (not queue_capacity) or (queue_capacity <= 0):
@@ -168,33 +326,25 @@ def read_keyed_batch_examples(
     raise ValueError('Invalid num_epochs %s.' % num_epochs)
 
   with ops.name_scope(name, 'read_batch_examples', [file_pattern]) as scope:
-    # Setup filename queue with shuffling.
     with ops.name_scope('file_name_queue') as file_name_queue_scope:
-      file_name_queue = input_ops.string_input_producer(
-          constant_op.constant(file_names, name='input'),
-          shuffle=randomize_input, num_epochs=num_epochs,
-          name=file_name_queue_scope)
+      if setup_shared_queue:
+        shared_file_name_queue = _get_shared_file_name_queue(
+            file_names, randomize_input, num_epochs, file_name_queue_scope)
+        file_name_queue = data_flow_ops.FIFOQueue(
+            capacity=1, dtypes=[dtypes.string], shapes=[[]])
+        enqueue_op = file_name_queue.enqueue(shared_file_name_queue.dequeue())
+        queue_runner.add_queue_runner(
+            queue_runner.QueueRunner(file_name_queue, [enqueue_op]))
+      else:
+        file_name_queue = input_ops.string_input_producer(
+            constant_op.constant(
+                file_names, name='input'),
+            shuffle=randomize_input,
+            num_epochs=num_epochs,
+            name=file_name_queue_scope)
 
-    # Create readers, one per thread and set them to read from filename queue.
-    with ops.name_scope('read'):
-      example_list = []
-      for _ in range(num_threads):
-        if read_batch_size > 1:
-          keys, examples_proto = reader().read_up_to(file_name_queue,
-                                                     read_batch_size)
-        else:
-          keys, examples_proto = reader().read(file_name_queue)
-        if parse_fn:
-          parsed_examples = parse_fn(examples_proto)
-          # Map keys into example map because batch_join doesn't support
-          # tuple of Tensor + dict.
-          if isinstance(parsed_examples, dict):
-            parsed_examples[KEY_FEATURE_NAME] = keys
-            example_list.append(parsed_examples)
-          else:
-            example_list.append((keys, parsed_examples))
-        else:
-          example_list.append((keys, examples_proto))
+    example_list = _get_examples(file_name_queue, reader, num_threads,
+                                 read_batch_size, parse_fn)
 
     enqueue_many = read_batch_size > 1
 
@@ -234,7 +384,8 @@ def read_keyed_batch_features(file_pattern,
                               queue_capacity=10000,
                               reader_num_threads=1,
                               feature_queue_capacity=100,
-                              num_queue_runners=2,
+                              num_queue_runners=None,
+                              num_enqueue_threads=None,
                               parse_fn=None,
                               name=None):
   """Adds operations to read, queue, batch and parse `Example` protos.
@@ -265,10 +416,17 @@ def read_keyed_batch_features(file_pattern,
     queue_capacity: Capacity for input queue.
     reader_num_threads: The number of threads to read examples.
     feature_queue_capacity: Capacity of the parsed features queue.
-    num_queue_runners: Number of queue runners to start for the feature queue,
-      Adding multiple queue runners for the parsed example queue helps maintain
+    num_queue_runners: Deprecated. Defaults to 2 if this and
+      `num_enqueue_threads` are both `None`. This is the number of queue
+      runners to start for the feature queue. Adding multiple queue runners for
+      the parsed example queue helps maintain a full queue when the subsequent
+      computations overall are cheaper than parsing. This argument will be
+      deprecated and replaced with `num_enqueue_threads`.
+    num_enqueue_threads: Number of threads to enqueue the parsed example queue.
+      Using multiple threads to enqueue the parsed example queue helps maintain
       a full queue when the subsequent computations overall are cheaper than
-      parsing.
+      parsing. This argument will replace `num_queue_runners`. This and
+      `num_queue_runners` can not both be set.
     parse_fn: Parsing function, takes `Example` Tensor returns parsed
       representation. If `None`, no parsing is done.
     name: Name of resulting op.
@@ -282,6 +440,9 @@ def read_keyed_batch_features(file_pattern,
     ValueError: for invalid inputs.
   """
 
+  num_queue_runners, num_enqueue_threads = _check_enqueue_params(
+      num_queue_runners, num_enqueue_threads)
+
   with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope:
     keys, examples = read_keyed_batch_examples(
         file_pattern, batch_size, reader, randomize_input=randomize_input,
@@ -290,6 +451,88 @@ def read_keyed_batch_features(file_pattern,
         parse_fn=parse_fn, name=scope)
     # Parse the example.
     feature_map = parsing_ops.parse_example(examples, features)
+    return queue_parsed_features(
+        feature_map,
+        keys=keys,
+        feature_queue_capacity=feature_queue_capacity,
+        num_queue_runners=num_queue_runners,
+        num_enqueue_threads=num_enqueue_threads,
+        name=scope)
+
+
+def _read_keyed_batch_features_shared_queue(file_pattern,
+                                            batch_size,
+                                            features,
+                                            reader,
+                                            randomize_input=True,
+                                            num_epochs=None,
+                                            queue_capacity=10000,
+                                            reader_num_threads=1,
+                                            feature_queue_capacity=100,
+                                            num_queue_runners=2,
+                                            parse_fn=None,
+                                            name=None):
+  """Adds operations to read, queue, batch and parse `Example` protos.
+
+  Given file pattern (or list of files), will setup a shared queue for file
+  names, setup a worker queue that gets filenames from the shared queue,
+  read `Example` proto using provided `reader`, use batch queue to create
+  batches of examples of size `batch_size` and parse example given `features`
+  specification.
+
+  All queue runners are added to the queue runners collection, and may be
+  started via `start_queue_runners`.
+
+  All ops are added to the default graph.
+
+  Args:
+    file_pattern: List of files or pattern of file paths containing
+        `Example` records. See `tf.gfile.Glob` for pattern rules.
+    batch_size: An int or scalar `Tensor` specifying the batch size to use.
+    features: A `dict` mapping feature keys to `FixedLenFeature` or
+      `VarLenFeature` values.
+    reader: A function or class that returns an object with
+      `read` method, (filename tensor) -> (example tensor).
+    randomize_input: Whether the input should be randomized.
+    num_epochs: Integer specifying the number of times to read through the
+      dataset. If None, cycles through the dataset forever. NOTE - If specified,
+      creates a variable that must be initialized, so call
+      tf.initialize_local_variables() as shown in the tests.
+    queue_capacity: Capacity for input queue.
+    reader_num_threads: The number of threads to read examples.
+    feature_queue_capacity: Capacity of the parsed features queue.
+    num_queue_runners: Number of queue runners to start for the feature queue,
+      Adding multiple queue runners for the parsed example queue helps maintain
+      a full queue when the subsequent computations overall are cheaper than
+      parsing.
+    parser_num_threads: (Deprecated) The number of threads to parse examples.
+    parse_fn: Parsing function, takes `Example` Tensor returns parsed
+      representation. If `None`, no parsing is done.
+    name: Name of resulting op.
+
+  Returns:
+    Returns tuple of:
+    - `Tensor` of string keys.
+    - A dict of `Tensor` or `SparseTensor` objects for each in `features`.
+
+  Raises:
+    ValueError: for invalid inputs.
+  """
+
+  with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope:
+    keys, examples = read_keyed_batch_examples_shared_queue(
+        file_pattern,
+        batch_size,
+        reader,
+        randomize_input=randomize_input,
+        num_epochs=num_epochs,
+        queue_capacity=queue_capacity,
+        num_threads=reader_num_threads,
+        read_batch_size=batch_size,
+        parse_fn=parse_fn,
+        name=scope)
+    # Parse the example.
+    feature_map = parsing_ops.parse_example(examples, features)
     return queue_parsed_features(
         feature_map,
         keys=keys,
@@ -301,7 +544,8 @@ def read_keyed_batch_features(file_pattern,
 def queue_parsed_features(parsed_features,
                           keys=None,
                           feature_queue_capacity=100,
-                          num_queue_runners=2,
+                          num_queue_runners=None,
+                          num_enqueue_threads=None,
                           name=None):
   """Speeds up parsing by using queues to do it asynchronously.
 
@@ -320,10 +564,17 @@ def queue_parsed_features(parsed_features,
     parsed_features: A dict of string key to `Tensor` or `SparseTensor` objects.
     keys: `Tensor` of string keys.
     feature_queue_capacity: Capacity of the parsed features queue.
-    num_queue_runners: Number of queue runners to start for the feature queue,
-      Adding multiple queue runners for the parsed example queue helps maintain
+    num_queue_runners: Deprecated. Defaults to 2 if this and
+      `num_enqueue_threads` are both `None`. This is the number of queue
+      runners to start for the feature queue. Adding multiple queue runners for
+      the parsed example queue helps maintain a full queue when the subsequent
+      computations overall are cheaper than parsing. This argument will be
+      deprecated and replaced with `num_enqueue_threads`.
+    num_enqueue_threads: Number of threads to enqueue the parsed example queue.
+      Using multiple threads to enqueue the parsed example queue helps maintain
       a full queue when the subsequent computations overall are cheaper than
-      parsing.
+      parsing. This argument will replace `num_queue_runners`. This and
+      `num_queue_runners` can not both be set.
     name: Name of resulting op.
 
   Returns:
@@ -331,7 +582,12 @@ def queue_parsed_features(parsed_features,
     - `Tensor` corresponding to `keys` if provided, otherwise `None`.
     -  A dict of string key to `Tensor` or `SparseTensor` objects corresponding
        to `parsed_features`.
+  Raises:
+    ValueError: for invalid inputs.
   """
+  num_queue_runners, num_enqueue_threads = _check_enqueue_params(
+      num_queue_runners, num_enqueue_threads)
+
   args = list(parsed_features.values())
   if keys is not None:
     args += [keys]
@@ -370,12 +626,31 @@ def queue_parsed_features(parsed_features,
 
     # Add multiple queue runners so that the queue is always full. Adding more
     # than two queue-runners may hog the cpu on the worker to fill up the queue.
-    for _ in range(num_queue_runners):
-      queue_runner.add_queue_runner(
-          queue_runner.QueueRunner(
-              input_queue, [input_queue.enqueue(tensors_to_enqueue)],
-              queue_closed_exception_types=(errors.OutOfRangeError,
-                                            errors.CancelledError)))
+    #
+    # Note: this can result in large last batch being lost as the multiple queue
+    # runner threads do not coordinate with each other. Please use
+    # `num_enqueue_threads` instead.
+    if num_queue_runners is not None:
+      for _ in range(num_queue_runners):
+        queue_runner.add_queue_runner(
+            queue_runner.QueueRunner(
+                input_queue, [input_queue.enqueue(tensors_to_enqueue)],
+                queue_closed_exception_types=(errors.OutOfRangeError,
+                                              errors.CancelledError)))
+    # Use a single QueueRunner with multiple threads to enqueue so the queue is
+    # always full. The threads are coordinated so the last batch will not be
+    # lost.
+    elif num_enqueue_threads is not None:
+      enqueue_ops = [input_queue.enqueue(tensors_to_enqueue)
+                     for _ in range(num_enqueue_threads)]
+      queue_runner.add_queue_runner(queue_runner.QueueRunner(
+          input_queue, enqueue_ops,
+          queue_closed_exception_types=(errors.OutOfRangeError,
+                                        errors.CancelledError)))
+    else:
+      raise AssertionError(
+          'Either `num_queue_runners` or `num_enqueue_threads` should have '
+          'been set.')
 
     dequeued_tensors = input_queue.dequeue()
 
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
index c1b1c409454..a0c143e9bb5 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
@@ -19,13 +19,17 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import base64
 import os
 import random
 import tempfile
 
+from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf
 
+from tensorflow.contrib.learn.python.learn.learn_io.graph_io import _read_keyed_batch_examples_shared_queue
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import gfile
 
@@ -243,6 +247,63 @@ class GraphIOTest(tf.test.TestCase):
 
       coord.request_stop()
 
+  def test_read_keyed_batch_features_mutual_exclusive_args(self):
+    filename = self._create_temp_file("abcde")
+    features = {"sequence": tf.FixedLenFeature([], tf.string)}
+    with self.assertRaisesRegexp(ValueError, "can not both be set"):
+      _, _ = tf.contrib.learn.read_keyed_batch_features(
+          filename, 1, features, tf.TextLineReader, randomize_input=False,
+          num_queue_runners=2, num_enqueue_threads=2)
+
+  def test_queue_parsed_features_mutual_exclusive_args(self):
+    parsed_features = {"a": tf.constant([10, 20, 30])}
+    with self.assertRaisesRegexp(ValueError, "can not both be set"):
+      _, _ = tf.contrib.learn.queue_parsed_features(
+          parsed_features, num_queue_runners=2, num_enqueue_threads=2)
+
+  def test_read_text_lines_large(self):
+    gfile.Glob = self._orig_glob
+    sequence_prefix = "abcdefghijklmnopqrstuvwxyz123456789"
+    num_records = 49999
+    lines = ["".join([sequence_prefix, str(l)]).encode("ascii")
+             for l in xrange(num_records)]
+    json_lines = ["".join(['{"features": { "feature": { "sequence": {',
+                           '"bytes_list": { "value": ["',
+                           base64.b64encode(l).decode("ascii"),
+                           '"]}}}}}\n']) for l in lines]
+    filename = self._create_temp_file("".join(json_lines))
+    batch_size = 10000
+    queue_capacity = 10000
+    name = "my_large_batch"
+
+    features = {"sequence": tf.FixedLenFeature([], tf.string)}
+
+    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
+      _, result = tf.contrib.learn.read_keyed_batch_features(
+          filename, batch_size, features, tf.TextLineReader,
+          randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
+          num_enqueue_threads=2, parse_fn=tf.decode_json_example, name=name)
+      session.run(tf.initialize_local_variables())
+      coord = tf.train.Coordinator()
+      threads = tf.train.start_queue_runners(session, coord=coord)
+
+      data = []
+      try:
+        while not coord.should_stop():
+          data.append(session.run(result))
+      except errors.OutOfRangeError:
+        pass
+      finally:
+        coord.request_stop()
+
+      coord.join(threads)
+    parsed_records = [item for sublist in [d["sequence"] for d in data]
+                      for item in sublist]
+    # Check that the number of records matches expected and all records
+    # are present.
+    self.assertEqual(len(parsed_records), num_records)
+    self.assertEqual(set(parsed_records), set(lines))
+
   def test_read_text_lines_multifile(self):
     gfile.Glob = self._orig_glob
     filenames = self._create_sorted_temp_files(["ABC\n", "DEF\nGHK\n"])
@@ -261,6 +322,18 @@ class GraphIOTest(tf.test.TestCase):
       coord = tf.train.Coordinator()
       tf.train.start_queue_runners(session, coord=coord)
 
+      self.assertEqual("%s:1" % name, inputs.name)
+      file_name_queue_name = "%s/file_name_queue" % name
+      file_names_name = "%s/input" % file_name_queue_name
+      example_queue_name = "%s/fifo_queue" % name
+      test_util.assert_ops_in_graph({
+          file_names_name: "Const",
+          file_name_queue_name: "FIFOQueue",
+          "%s/read/TextLineReader" % name: "TextLineReader",
+          example_queue_name: "FIFOQueue",
+          name: "QueueDequeueUpTo"
+      }, g)
+
       self.assertAllEqual(session.run(inputs), [b"ABC"])
       self.assertAllEqual(session.run(inputs), [b"DEF"])
       self.assertAllEqual(session.run(inputs), [b"GHK"])
@@ -269,6 +342,120 @@ class GraphIOTest(tf.test.TestCase):
 
       coord.request_stop()
 
+  def test_read_text_lines_multifile_with_shared_queue(self):
+    gfile.Glob = self._orig_glob
+    filenames = self._create_sorted_temp_files(["ABC\n", "DEF\nGHK\n"])
+
+    batch_size = 1
+    queue_capacity = 5
+    name = "my_batch"
+
+    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
+      _, inputs = _read_keyed_batch_examples_shared_queue(
+          filenames,
+          batch_size,
+          reader=tf.TextLineReader,
+          randomize_input=False,
+          num_epochs=1,
+          queue_capacity=queue_capacity,
+          name=name)
+      session.run(tf.initialize_local_variables())
+
+      coord = tf.train.Coordinator()
+      tf.train.start_queue_runners(session, coord=coord)
+
+      self.assertEqual("%s:1" % name, inputs.name)
+      shared_file_name_queue_name = "%s/file_name_queue" % name
+      file_names_name = "%s/input" % shared_file_name_queue_name
+      example_queue_name = "%s/fifo_queue" % name
+      worker_file_name_queue_name = "%s/file_name_queue/fifo_queue" % name
+      test_util.assert_ops_in_graph({
+          file_names_name: "Const",
+          shared_file_name_queue_name: "FIFOQueue",
+          "%s/read/TextLineReader" % name: "TextLineReader",
+          example_queue_name: "FIFOQueue",
+          worker_file_name_queue_name: "FIFOQueue",
+          name: "QueueDequeueUpTo"
+      }, g)
+
+      self.assertAllEqual(session.run(inputs), [b"ABC"])
+      self.assertAllEqual(session.run(inputs), [b"DEF"])
+      self.assertAllEqual(session.run(inputs), [b"GHK"])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run(inputs)
+
+      coord.request_stop()
+
+  def _get_qr(self, name):
+    for qr in ops.get_collection(ops.GraphKeys.QUEUE_RUNNERS):
+      if qr.name == name:
+        return qr
+
+  def _run_queue(self, name, session):
+    qr = self._get_qr(name)
+    for op in qr.enqueue_ops:
+      session.run(op)
+
+  def test_multiple_workers_with_shared_queue(self):
+    gfile.Glob = self._orig_glob
+    filenames = self._create_sorted_temp_files([
+        "ABC\n", "DEF\n", "GHI\n", "JKL\n", "MNO\n", "PQR\n", "STU\n", "VWX\n",
+        "YZ\n"
+    ])
+
+    batch_size = 1
+    queue_capacity = 5
+    name = "my_batch"
+    shared_file_name_queue_name = "%s/file_name_queue" % name
+    example_queue_name = "%s/fifo_queue" % name
+    worker_file_name_queue_name = "%s/file_name_queue/fifo_queue" % name
+
+    server = tf.train.Server.create_local_server()
+
+    with tf.Graph().as_default() as g1, tf.Session(
+        server.target, graph=g1) as session:
+      _, inputs = _read_keyed_batch_examples_shared_queue(
+          filenames,
+          batch_size,
+          reader=tf.TextLineReader,
+          randomize_input=False,
+          num_epochs=1,
+          queue_capacity=queue_capacity,
+          name=name)
+      session.run(tf.initialize_local_variables())
+
+      # Run the three queues once manually.
+      self._run_queue(shared_file_name_queue_name, session)
+      self._run_queue(worker_file_name_queue_name, session)
+      self._run_queue(example_queue_name, session)
+
+      self.assertAllEqual(session.run(inputs), [b"ABC"])
+
+      # Run the worker and the example queue.
+      self._run_queue(worker_file_name_queue_name, session)
+      self._run_queue(example_queue_name, session)
+
+      self.assertAllEqual(session.run(inputs), [b"DEF"])
+
+    with tf.Graph().as_default() as g2, tf.Session(
+        server.target, graph=g2) as session:
+      _, inputs = _read_keyed_batch_examples_shared_queue(
+          filenames,
+          batch_size,
+          reader=tf.TextLineReader,
+          randomize_input=False,
+          num_epochs=1,
+          queue_capacity=queue_capacity,
+          name=name)
+
+      # Run the worker and the example queue.
+      self._run_queue(worker_file_name_queue_name, session)
+      self._run_queue(example_queue_name, session)
+
+      self.assertAllEqual(session.run(inputs), [b"GHI"])
+
+    self.assertTrue(g1 is not g2)
+
   def test_batch_text_lines(self):
     gfile.Glob = self._orig_glob
     filename = self._create_temp_file("A\nB\nC\nD\nE\n")
diff --git a/tensorflow/contrib/learn/python/learn/tests/io_test.py b/tensorflow/contrib/learn/python/learn/learn_io/io_test.py
similarity index 97%
rename from tensorflow/contrib/learn/python/learn/tests/io_test.py
rename to tensorflow/contrib/learn/python/learn/learn_io/io_test.py
index 80732337f97..a299cadaae9 100644
--- a/tensorflow/contrib/learn/python/learn/tests/io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/io_test.py
@@ -45,7 +45,7 @@ class IOTest(tf.test.TestCase):
           feature_columns=learn.infer_real_valued_columns_from_input(data),
           n_classes=3)
       classifier.fit(data, labels, steps=100)
-      score = accuracy_score(labels[0], classifier.predict(data))
+      score = accuracy_score(labels[0], list(classifier.predict(data)))
       self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
     else:
       print("No pandas installed. pandas-related tests are skipped.")
@@ -61,7 +61,7 @@ class IOTest(tf.test.TestCase):
           feature_columns=learn.infer_real_valued_columns_from_input(data),
           n_classes=3)
       classifier.fit(data, labels, steps=100)
-      score = accuracy_score(labels, classifier.predict(data))
+      score = accuracy_score(labels, list(classifier.predict(data)))
       self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
 
   def test_string_data_formats(self):
diff --git a/tensorflow/contrib/learn/python/learn/tests/learn_runner_test.py b/tensorflow/contrib/learn/python/learn/learn_runner_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/tests/learn_runner_test.py
rename to tensorflow/contrib/learn/python/learn/learn_runner_test.py
diff --git a/tensorflow/contrib/learn/python/learn/tests/metric_spec_test.py b/tensorflow/contrib/learn/python/learn/metric_spec_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/tests/metric_spec_test.py
rename to tensorflow/contrib/learn/python/learn/metric_spec_test.py
diff --git a/tensorflow/contrib/learn/python/learn/tests/monitors_test.py b/tensorflow/contrib/learn/python/learn/monitors_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/tests/monitors_test.py
rename to tensorflow/contrib/learn/python/learn/monitors_test.py
diff --git a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py
index 7662191a5e2..f1163ee02cb 100644
--- a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py
+++ b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py
@@ -33,7 +33,7 @@ def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None):
     predictions = nn.xw_plus_b(tensor_in, weights, biases)
     if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2:
       predictions = array_ops_.squeeze(predictions, squeeze_dims=[1])
-    return predictions, loss_ops.sum_of_squares(predictions, labels)
+    return predictions, loss_ops.mean_squared_error(predictions, labels)
 
 
 def softmax_classifier(tensor_in,
diff --git a/tensorflow/contrib/learn/python/learn/ops/tests/ops_test.py b/tensorflow/contrib/learn/python/learn/ops/ops_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/ops/tests/ops_test.py
rename to tensorflow/contrib/learn/python/learn/ops/ops_test.py
diff --git a/tensorflow/contrib/learn/python/learn/ops/tests/seq2seq_ops_test.py b/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops_test.py
similarity index 100%
rename from tensorflow/contrib/learn/python/learn/ops/tests/seq2seq_ops_test.py
rename to tensorflow/contrib/learn/python/learn/ops/seq2seq_ops_test.py
diff --git a/tensorflow/contrib/learn/python/learn/ops/tests/__init__.py b/tensorflow/contrib/learn/python/learn/ops/tests/__init__.py
deleted file mode 100644
index 7376211abfe..00000000000
--- a/tensorflow/contrib/learn/python/learn/ops/tests/__init__.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Sequence-to-sequence tests."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
diff --git a/tensorflow/contrib/learn/python/learn/tests/basic_session_run_hooks_test.py b/tensorflow/contrib/learn/python/learn/tests/basic_session_run_hooks_test.py
deleted file mode 100644
index 73988c01234..00000000000
--- a/tensorflow/contrib/learn/python/learn/tests/basic_session_run_hooks_test.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# pylint: disable=g-bad-file-header
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for basic_session_run_hooks."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-import shutil
-import tempfile
-import time
-
-import tensorflow as tf
-
-from tensorflow.contrib import testing
-from tensorflow.python.training import monitored_session
-
-
-class StepCounterHookTest(tf.test.TestCase):
-
-  def setUp(self):
-    self.log_dir = tempfile.mkdtemp()
-
-  def tearDown(self):
-    shutil.rmtree(self.log_dir, ignore_errors=True)
-
-  def test_step_counter(self):
-    with tf.Graph().as_default() as g, tf.Session() as sess:
-      global_step = tf.contrib.framework.get_or_create_global_step()
-      train_op = tf.assign_add(global_step, 1)
-      summary_writer = testing.FakeSummaryWriter(self.log_dir, g)
-      hook = tf.train.StepCounterHook(
-          summary_writer=summary_writer, every_n_steps=10)
-      hook.begin()
-      sess.run(tf.initialize_all_variables())
-      mon_sess = monitored_session._HookedSession(sess, [hook])
-      for _ in range(30):
-        time.sleep(0.01)
-        mon_sess.run(train_op)
-      hook.end(sess)
-      summary_writer.assert_summaries(
-          test_case=self,
-          expected_logdir=self.log_dir,
-          expected_graph=g,
-          expected_summaries={})
-      for step in [11, 21]:
-        summary_value = summary_writer.summaries[step][0].value[0]
-        self.assertTrue(summary_value.tag, 'global_step/sec')
-        # check at least 10 steps per sec is recorded.
-        self.assertGreater(summary_value.simple_value, 10)
-
-
-class SummarySaverHookTest(tf.test.TestCase):
-
-  def test_summary_saver(self):
-    with tf.Graph().as_default() as g, tf.Session() as sess:
-      log_dir = 'log/dir'
-      summary_writer = testing.FakeSummaryWriter(log_dir, g)
-      var = tf.Variable(0.0)
-      tensor = tf.assign_add(var, 1.0)
-      summary_op = tf.scalar_summary('my_summary', tensor)
-      global_step = tf.contrib.framework.get_or_create_global_step()
-      train_op = tf.assign_add(global_step, 1)
-      hook = tf.train.SummarySaverHook(
-          summary_op=summary_op, save_steps=8, summary_writer=summary_writer)
-      hook.begin()
-      sess.run(tf.initialize_all_variables())
-      mon_sess = monitored_session._HookedSession(sess, [hook])
-      for i in range(30):
-        _ = i
-        mon_sess.run(train_op)
-      hook.end(sess)
-      summary_writer.assert_summaries(
-          test_case=self,
-          expected_logdir=log_dir,
-          expected_graph=g,
-          expected_summaries={
-              1: {'my_summary': 1.0},
-              9: {'my_summary': 2.0},
-              17: {'my_summary': 3.0},
-              25: {'my_summary': 4.0},
-          })
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow/contrib/learn/python/learn/tests/summary_writer_cache_test.py b/tensorflow/contrib/learn/python/learn/tests/summary_writer_cache_test.py
deleted file mode 100644
index 2df47a75ba5..00000000000
--- a/tensorflow/contrib/learn/python/learn/tests/summary_writer_cache_test.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# pylint: disable=g-bad-file-header
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Runner."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import glob
-import os
-
-import tensorflow as tf
-
-from tensorflow.contrib.learn.python.learn import summary_writer_cache
-
-
-class SummaryWriterCacheTest(tf.test.TestCase):
-  """SummaryWriterCache tests."""
-
-  def _test_dir(self, test_name):
-    """Create an empty dir to use for tests.
-
-    Args:
-      test_name: Name of the test.
-
-    Returns:
-      Absolute path to the test directory.
-    """
-    test_dir = os.path.join(self.get_temp_dir(), test_name)
-    if os.path.isdir(test_dir):
-      for f in glob.glob('%s/*' % test_dir):
-        os.remove(f)
-    else:
-      os.makedirs(test_dir)
-    return test_dir
-
-  def test_cache(self):
-    with tf.Graph().as_default():
-      dir1 = self._test_dir('test_cache_1')
-      dir2 = self._test_dir('test_cache_2')
-      sw1 = summary_writer_cache.SummaryWriterCache.get(dir1)
-      sw2 = summary_writer_cache.SummaryWriterCache.get(dir2)
-      sw3 = summary_writer_cache.SummaryWriterCache.get(dir1)
-      self.assertEqual(sw1, sw3)
-      self.assertFalse(sw1 == sw2)
-      sw1.close()
-      sw2.close()
-      events1 = glob.glob(os.path.join(dir1, 'event*'))
-      self.assertTrue(events1)
-      events2 = glob.glob(os.path.join(dir2, 'event*'))
-      self.assertTrue(events2)
-      events3 = glob.glob(os.path.join('nowriter', 'event*'))
-      self.assertFalse(events3)
-
-  def test_clear(self):
-    with tf.Graph().as_default():
-      dir1 = self._test_dir('test_clear')
-      sw1 = summary_writer_cache.SummaryWriterCache.get(dir1)
-      summary_writer_cache.SummaryWriterCache.clear()
-      sw2 = summary_writer_cache.SummaryWriterCache.get(dir1)
-      self.assertFalse(sw1 == sw2)
-
-
-if __name__ == '__main__':
-  tf.test.main()
diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py
index 00ad08dc110..5313dd3a4ea 100644
--- a/tensorflow/contrib/learn/python/learn/utils/export.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export.py
@@ -19,7 +19,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
@@ -312,21 +311,10 @@ def _export_estimator(estimator,
                                                                predictions)
     else:
       try:
-        # Some estimators provide a target_column of known type
-        target_column = estimator._get_target_column()
-        problem_type = target_column.problem_type
-
-        if problem_type == layers.ProblemType.CLASSIFICATION:
-          signature_fn = classification_signature_fn
-        elif problem_type == layers.ProblemType.LINEAR_REGRESSION:
-          signature_fn = regression_signature_fn
-        elif problem_type == layers.ProblemType.LOGISTIC_REGRESSION:
-          signature_fn = logistic_regression_signature_fn
-        else:
-          raise ValueError(
-              'signature_fn must be provided because the TargetColumn is a %s, '
-              'which does not have a standard problem type and so cannot use a '
-              'standard export signature.' % type(target_column).__name__)
+        # Some estimators provide a signature function.
+        # TODO(zakaria): check if the estimator has this function,
+        #   raise helpful error if not
+        signature_fn = estimator._create_signature_fn()
 
         default_signature, named_graph_signatures = (
             signature_fn(examples, features, predictions))
diff --git a/tensorflow/contrib/learn/python/learn/utils/export_test.py b/tensorflow/contrib/learn/python/learn/utils/export_test.py
index 54e3e8962f7..0f1c7e6d807 100644
--- a/tensorflow/contrib/learn/python/learn/utils/export_test.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export_test.py
@@ -47,6 +47,28 @@ class ExportTest(tf.test.TestCase):
       default_signature = signatures.default_signature
       return default_signature
 
+  def testExportMonitor_EstimatorProvidesSignature(self):
+    random.seed(42)
+    x = np.random.rand(1000)
+    y = 2 * x + 3
+    cont_features = [tf.contrib.layers.real_valued_column('', dimension=1)]
+    regressor = learn.LinearRegressor(feature_columns=cont_features)
+    export_dir = tempfile.mkdtemp() + 'export/'
+    export_monitor = learn.monitors.ExportMonitor(
+        every_n_steps=1, export_dir=export_dir, exports_to_keep=2)
+    regressor.fit(x, y, steps=10,
+                  monitors=[export_monitor])
+
+    self.assertTrue(tf.gfile.Exists(export_dir))
+    # Only the written checkpoints are exported.
+    self.assertTrue(tf.gfile.Exists(export_dir + '00000001/export'))
+    self.assertTrue(tf.gfile.Exists(export_dir + '00000010/export'))
+    self.assertEquals(export_monitor.last_export_dir, os.path.join(export_dir,
+                                                                   '00000010'))
+    # Validate the signature
+    signature = self._get_default_signature(export_dir + '00000010/export.meta')
+    self.assertTrue(signature.HasField('regression_signature'))
+
   def testExportMonitor(self):
     random.seed(42)
     x = np.random.rand(1000)
diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
index 466d1aac51e..b749cd18664 100644
--- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
@@ -189,6 +189,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
         # The high tolerance in unregularized_loss comparisons is due to the
         # fact that it's possible to trade off unregularized_loss vs.
         # regularization and still have a sum that is quite close to the
@@ -248,6 +249,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
 
           for t in threads:
             t.join()
+          lr.update_weights(train_op).run()
 
           # The high tolerance in unregularized_loss comparisons is due to the
           # fact that it's possible to trade off unregularized_loss vs.
@@ -294,6 +296,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
 
         # There is neither L1 nor L2 loss, so regularized and unregularized
         # losses should be exactly the same.
@@ -346,6 +349,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
 
         self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
         self.assertAllClose(0.525457, loss.eval(), atol=0.01)
@@ -416,6 +420,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
 
         self.assertAllClose(0.226487 + 0.102902,
                             unregularized_loss.eval(),
@@ -456,6 +461,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
 
         self.assertAllClose(0.284860, unregularized_loss.eval(), atol=0.08)
         self.assertAllClose(0.408044, loss.eval(), atol=0.012)
@@ -494,6 +500,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
         train_op = lr.minimize()
         for _ in range(_MAX_ITERATIONS):
           train_op.run()
+        lr.update_weights(train_op).run()
         self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05)
         self.assertAllClose(0.525457, loss.eval(), atol=0.01)
         predicted_labels = get_binary_predictions_for_logistic(predictions)
@@ -580,6 +587,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # Predictions should be 2/3 of label due to minimizing regularized loss:
       #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2
@@ -626,6 +634,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # Predictions should be 1/5 of label due to minimizing regularized loss:
       #   (label - 2 * weight)^2 + L2 * 16 * weight^2
@@ -661,6 +670,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # Predictions should be -4.0, 48/5 due to minimizing regularized loss:
       #   (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 + L1 * 4 * weight
@@ -696,6 +706,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # There are 4 (sparse) variable weights to be learned. 2 for age and 2 for
       # gender. Let w_1, w_2 be age weights, w_3, w_4 be gender weights, y_1,
@@ -729,6 +740,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # The loss function for these particular features is given by:
       # 1/2(label_1-w_1)^2 + 1/2(label_2-w_2)^2 + \lambda/2 (w_1^2 + w_2^2). So,
@@ -759,6 +771,7 @@ class SdcaWithLinearLossTest(SdcaModelTest):
       train_op = lr.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      lr.update_weights(train_op).run()
 
       # The loss function for these particular features is given by:
       # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 +
@@ -816,6 +829,7 @@ class SdcaWithHingeLossTest(SdcaModelTest):
       train_op = model.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      model.update_weights(train_op).run()
 
       binary_predictions = get_binary_predictions_for_hinge(predictions)
       self.assertAllEqual([-1.0, 1.0], predictions.eval())
@@ -841,6 +855,7 @@ class SdcaWithHingeLossTest(SdcaModelTest):
       train_op = model.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      model.update_weights(train_op).run()
 
       self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05)
       self.assertAllEqual([1, 0], binary_predictions.eval())
@@ -871,6 +886,7 @@ class SdcaWithHingeLossTest(SdcaModelTest):
       train_op = model.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      model.update_weights(train_op).run()
 
       # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints
       # are within the margins so there is unregularized loss (1/2 per example).
@@ -899,6 +915,7 @@ class SdcaWithHingeLossTest(SdcaModelTest):
       train_op = model.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      model.update_weights(train_op).run()
 
       # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will
       # try to increase the margin from (1.0, 0.5). Due to regularization,
@@ -953,6 +970,7 @@ class SdcaWithSmoothHingeLossTest(SdcaModelTest):
       train_op = model.minimize()
       for _ in range(_MAX_ITERATIONS):
         train_op.run()
+      model.update_weights(train_op).run()
 
       binary_predictions = get_binary_predictions_for_hinge(predictions)
       self.assertAllClose([-0.67, 0.67], predictions.eval(), atol=0.05)
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index ad7c09e18f3..6c3886d247e 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -278,7 +278,8 @@ class SdcaModel(object):
     ```python
     # Create a solver with the desired parameters.
     lr = tf.contrib.linear_optimizer.SdcaModel(examples, variables, options)
-    opt_op = lr.minimize()
+    min_op = lr.minimize()
+    opt_op = lr.update_weights(min_op)
 
     predictions = lr.predictions(examples)
     # Primal loss + L1 loss + L2 loss.
@@ -565,35 +566,46 @@ class SdcaModel(object):
         for w, u in zip(self._slots['unshrinked_dense_features_weights'], dfw):
           update_ops.append(w.assign_add(u))
 
-        with ops.control_dependencies(update_ops):
-          update_ops = []
-          # Copy over unshrinked weights to user provided variables.
-          for i, name in enumerate(
-              ['sparse_features_weights', 'dense_features_weights']):
-            for var, slot_var in zip(self._variables[name],
-                                     self._slots['unshrinked_' + name]):
-              update_ops.append(var.assign(slot_var))
-
-          update_group = control_flow_ops.group(*update_ops)
-
-          # Apply proximal step.
-          with ops.control_dependencies([update_group]):
-            shrink_ops = []
-            for name in ['sparse_features_weights', 'dense_features_weights']:
-              for var in self._variables[name]:
-                with ops.device(var.device):
-                  shrink_ops.append(
-                      sdca_shrink_l1(
-                          self._convert_n_to_tensor(
-                              [var], as_ref=True),
-                          l1=self._symmetric_l1_regularization(),
-                          l2=self._symmetric_l2_regularization()))
-            shrink_l1 = control_flow_ops.group(*shrink_ops)
       if not global_step:
-        return shrink_l1
-      with ops.control_dependencies([shrink_l1]):
+        return control_flow_ops.group(*update_ops)
+      with ops.control_dependencies(update_ops):
         return state_ops.assign_add(global_step, 1, name=name).op
 
+  def update_weights(self, train_op):
+    """Updates the model weights.
+
+    This function must be called on at least one worker after `minimize`.
+    In distributed training this call can be omitted on non-chief workers to
+    speed up training.
+
+    Args:
+      train_op: The operation returned by the `minimize` call.
+
+    Returns:
+      An Operation that updates the model weights.
+    """
+    with ops.control_dependencies([train_op]):
+      update_ops = []
+      # Copy over unshrinked weights to user provided variables.
+      for name in ['sparse_features_weights', 'dense_features_weights']:
+        for var, slot_var in zip(self._variables[name],
+                                 self._slots['unshrinked_' + name]):
+          update_ops.append(var.assign(slot_var))
+
+    # Apply proximal step.
+    with ops.control_dependencies(update_ops):
+      update_ops = []
+      for name in ['sparse_features_weights', 'dense_features_weights']:
+        for var in self._variables[name]:
+          with ops.device(var.device):
+            update_ops.append(
+                sdca_shrink_l1(
+                    self._convert_n_to_tensor(
+                        [var], as_ref=True),
+                    l1=self._symmetric_l1_regularization(),
+                    l2=self._symmetric_l2_regularization()))
+      return control_flow_ops.group(*update_ops)
+
   def approximate_duality_gap(self):
     """Add operations to compute the approximate duality gap.
 
diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py
index 334acf7a041..6ff4bf31753 100644
--- a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py
+++ b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py
@@ -49,7 +49,7 @@ class SDCAOptimizer(object):
   as `key` whose value is a `Tensor` of shape [batch_size] and dtype string.
   num_loss_partitions defines the number of partitions of the global loss
   function and should be set to (#concurrent train ops/per worker) x (#workers).
-  Convergence of (global) loss is guranteed if num_loss_partitions is larger or
+  Convergence of (global) loss is guaranteed if num_loss_partitions is larger or
   equal to the above product. Larger values for num_loss_partitions lead to
   slower convergence. The recommended value for num_loss_partitions in tf.learn
   (where currently there is one process per worker) is the number of workers
@@ -181,4 +181,5 @@ class SDCAOptimizer(object):
             num_loss_partitions=self._num_loss_partitions,
             num_table_shards=self._num_table_shards,
             loss_type=loss_type))
-    return sdca_model.minimize(global_step=global_step)
+    train_op = sdca_model.minimize(global_step=global_step)
+    return sdca_model, train_op
diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py
index 704c9135656..023efd125d8 100644
--- a/tensorflow/contrib/losses/python/losses/loss_ops.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops.py
@@ -21,7 +21,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework.python.ops import add_arg_scope
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -43,9 +42,7 @@ __all__ = ["absolute_difference",
            "mean_squared_error",
            "sigmoid_cross_entropy",
            "softmax_cross_entropy",
-           "sparse_softmax_cross_entropy",
-           "sum_of_pairwise_squares",
-           "sum_of_squares"]
+           "sparse_softmax_cross_entropy"]
 
 
 def _scale_losses(losses, weight):
@@ -486,8 +483,7 @@ def hinge_loss(logits, target, scope=None):
     return losses
 
 
-@deprecated("2016-10-01", "Use mean_squared_error.")
-def sum_of_squares(predictions, targets, weight=1.0, scope=None):
+def mean_squared_error(predictions, targets, weight=1.0, scope=None):
   """Adds a Sum-of-Squares loss to the training procedure.
 
   `weight` acts as a coefficient for the loss. If a scalar is provided, then the
@@ -512,7 +508,7 @@ def sum_of_squares(predictions, targets, weight=1.0, scope=None):
     ValueError: If the shape of `predictions` doesn't match that of `targets` or
       if the shape of `weight` is invalid.
   """
-  with ops.name_scope(scope, "sum_of_squares_loss",
+  with ops.name_scope(scope, "mean_squared_error",
                       [predictions, targets]) as scope:
     predictions.get_shape().assert_is_compatible_with(targets.get_shape())
     if weight is None:
@@ -523,17 +519,13 @@ def sum_of_squares(predictions, targets, weight=1.0, scope=None):
     return compute_weighted_loss(losses, weight)
 
 
-mean_squared_error = sum_of_squares
-
-
-@deprecated("2016-10-01", "Use mean_pairwise_squared_error.")
-def sum_of_pairwise_squares(predictions, targets, weight=1.0, scope=None):
+def mean_pairwise_squared_error(predictions, targets, weight=1.0, scope=None):
   """Adds a pairwise-errors-squared loss to the training procedure.
 
-  Unlike the sum_of_squares loss, which is a measure of the differences between
-  corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares
-  is a measure of the differences between pairs of corresponding elements of
-  `predictions` and `targets`.
+  Unlike `mean_squared_error`, which is a measure of the differences between
+  corresponding elements of `predictions` and `targets`,
+  `mean_pairwise_squared_error` is a measure of the differences between pairs of
+  corresponding elements of `predictions` and `targets`.
 
   For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are
   three pairs of differences are summed to compute the loss:
@@ -566,7 +558,7 @@ def sum_of_pairwise_squares(predictions, targets, weight=1.0, scope=None):
     ValueError: If the shape of `predictions` doesn't match that of `targets` or
       if the shape of `weight` is invalid.
   """
-  with ops.name_scope(scope, "sum_of_pairwise_squares_loss",
+  with ops.name_scope(scope, "mean_pairwise_squared_error",
                       [predictions, targets]) as scope:
     predictions.get_shape().assert_is_compatible_with(targets.get_shape())
     if weight is None:
@@ -607,9 +599,6 @@ def sum_of_pairwise_squares(predictions, targets, weight=1.0, scope=None):
     return mean_loss
 
 
-mean_pairwise_squared_error = sum_of_pairwise_squares
-
-
 def cosine_distance(predictions, targets, dim, weight=1.0, scope=None):
   """Adds a cosine-distance loss to the training procedure.
 
diff --git a/tensorflow/contrib/losses/python/losses/loss_ops_test.py b/tensorflow/contrib/losses/python/losses/loss_ops_test.py
index 88648bad26a..363caf4f3d5 100644
--- a/tensorflow/contrib/losses/python/losses/loss_ops_test.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops_test.py
@@ -779,12 +779,6 @@ class MeanSquaredErrorTest(tf.test.TestCase):
     self._predictions = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
     self._targets = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
 
-  def testDeprecatedName(self):
-    loss = tf.contrib.losses.sum_of_squares(
-        self._predictions, self._predictions)
-    with self.test_session():
-      self.assertAlmostEqual(0.0, loss.eval(), 3)
-
   def testValueErrorThrownWhenWeightIsNone(self):
     with self.test_session():
       with self.assertRaises(ValueError):
@@ -875,13 +869,6 @@ class MeanPairwiseSquaresErrorTest(tf.test.TestCase):
 
     self._expected_losses = np.divide(total, 9.0)
 
-  def testDeprecatedName(self):
-    loss = tf.contrib.losses.sum_of_pairwise_squares(
-        predictions=tf.constant(self._predictions),
-        targets=tf.constant(self._targets))
-    with self.test_session():
-      self.assertAlmostEqual(np.sum(self._expected_losses), loss.eval(), 3)
-
   def testValueErrorThrownWhenWeightIsNone(self):
     with self.test_session():
       with self.assertRaises(ValueError):
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index 59eabe2bbb3..3f1cd91a5b0 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -136,7 +136,7 @@ $(shell mkdir -p $(DEPDIR) >/dev/null)
 
 # Settings for the target compiler.
 CXX := $(CC_PREFIX) gcc
-OPTFLAGS := -O0
+OPTFLAGS := -O2
 CXXFLAGS := --std=c++11 -DIS_SLIM_BUILD -fno-exceptions -DNDEBUG $(OPTFLAGS)
 LDFLAGS := \
 -L/usr/local/lib
@@ -229,6 +229,8 @@ ifeq ($(TARGET),ANDROID)
 --sysroot $(NDK_ROOT)/platforms/android-21/arch-arm \
 -Wno-narrowing \
 -march=armv7-a \
+-mfloat-abi=softfp \
+-mfpu=neon \
 -fPIE
 
 	INCLUDES = \
diff --git a/tensorflow/contrib/makefile/build_all_android.sh b/tensorflow/contrib/makefile/build_all_android.sh
index bf36be23f7c..80cc54539c1 100755
--- a/tensorflow/contrib/makefile/build_all_android.sh
+++ b/tensorflow/contrib/makefile/build_all_android.sh
@@ -66,6 +66,7 @@ if [[ "${USE_HEXAGON}" == "true" ]]; then
     HEXAGON_INCLUDE="${HEXAGON_PARENT_DIR}/include"
 fi
 
+# Recommend make -j<#jobs> e.g. -j8 to speed up build on multi-core machine
 if [[ -z "${BUILD_TARGET}" ]]; then
     make -f tensorflow/contrib/makefile/Makefile \
          TARGET=ANDROID NDK_ROOT="${NDK_ROOT}" CC_PREFIX="${CC_PREFIX}" \
diff --git a/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py b/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py
index a4469029c1e..1820f6bf17d 100644
--- a/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py
@@ -33,27 +33,34 @@ def confusion_matrix(predictions, labels, num_classes=None, dtype=dtypes.int32,
   Calculate the Confusion Matrix for a pair of prediction and
   label 1-D int arrays.
 
-  Considering a prediction array such as: `[1, 2, 3]`
-  And a label array such as: `[2, 2, 3]`
-
-  The confusion matrix returned would be the following one:
-
-  ```python
-      [[0, 0, 0]
-       [0, 1, 0]
-       [0, 1, 0]
-       [0, 0, 1]]
-  ```
-
-  If `weights` is not None, then the confusion matrix elements are the
-  corresponding `weights` elements.
-
-  Where the matrix rows represent the prediction labels and the columns
+  The matrix rows represent the prediction labels and the columns
   represents the real labels. The confusion matrix is always a 2-D array
-  of shape [n, n], where n is the number of valid labels for a given
+  of shape `[n, n]`, where `n` is the number of valid labels for a given
   classification task. Both prediction and labels must be 1-D arrays of
   the same shape in order for this function to work.
 
+  If `num_classes` is None, then `num_classes` will be set to the one plus
+  the maximum value in either predictions or labels.
+  Class labels are expected to start at 0. E.g., if `num_classes` was
+  three, then the possible labels would be `[0, 1, 2]`.
+
+  If `weights` is not `None`, then each prediction contributes its
+  corresponding weight to the total value of the confusion matrix cell.
+
+  For example:
+
+  ```python
+    tf.contrib.metrics.confusion_matrix([1, 2, 4], [2, 2, 4]) ==>
+        [[0 0 0 0 0]
+         [0 0 1 0 0]
+         [0 0 1 0 0]
+         [0 0 0 0 0]
+         [0 0 0 0 1]]
+  ```
+
+  Note that the possible labels are assumed to be `[0, 1, 2, 3, 4]`,
+  resulting in a 5x5 confusion matrix.
+
   Args:
     predictions: A 1-D array representing the predictions for a given
                  classification.
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index 6cbe01e6713..2359fbd5691 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -22,6 +22,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_args
 from tensorflow.contrib.framework import tensor_util
 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
@@ -113,13 +114,15 @@ def _safe_scalar_div(numerator, denominator, name):
       name=name)
 
 
-def _create_local(name, shape=None, collections=None, dtype=dtypes.float32):
+def _create_local(name, shape, collections=None, validate_shape=True,
+                  dtype=dtypes.float32):
   """Creates a new local variable.
 
   Args:
     name: The name of the new or existing variable.
     shape: Shape of the new or existing variable.
     collections: A list of collection names to which the Variable will be added.
+    validate_shape: Whether to validate the shape of the variable.
     dtype: Data type of the variables.
 
   Returns:
@@ -132,7 +135,8 @@ def _create_local(name, shape=None, collections=None, dtype=dtypes.float32):
       initial_value=array_ops.zeros(shape, dtype=dtype),
       name=name,
       trainable=False,
-      collections=collections)
+      collections=collections,
+      validate_shape=validate_shape)
 
 
 def _count_condition(values, weights=None, metrics_collections=None,
@@ -1225,6 +1229,8 @@ def _at_k_name(name, k, class_id=None):
   return name
 
 
+@deprecated('2016-11-08', 'Please use `streaming_sparse_recall_at_k`, '
+            'and reshape labels from [batch_size] to [batch_size, 1].')
 @deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask')
 def streaming_recall_at_k(predictions, labels, k, ignore_mask=None,
                           weights=None, metrics_collections=None,
@@ -1328,7 +1334,7 @@ def streaming_sparse_recall_at_k(predictions,
     labels: `int64` `Tensor` or `SparseTensor` with shape
       [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
       target classes for the associated prediction. Commonly, N=1 and `labels`
-      has shape [batch_size, num_labels]. [D1, ... DN] must match `labels`.
+      has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`.
       Values should be in range [0, num_classes], where num_classes is the last
       dimension of `predictions`.
     k: Integer, k for @k metric.
@@ -1429,7 +1435,7 @@ def streaming_sparse_precision_at_k(predictions,
       [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
       target classes for the associated prediction. Commonly, N=1 and `labels`
       has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `predictions_idx`. Values should be in range [0, num_classes], where
+      `predictions`. Values should be in range [0, num_classes], where
       num_classes is the last dimension of `predictions`.
     k: Integer, k for @k metric.
     class_id: Integer class ID for which we want binary metrics. This should be
@@ -1596,7 +1602,7 @@ def sparse_average_precision_at_k(predictions, labels, k):
       [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
       target classes for the associated prediction. Commonly, N=1 and `labels`
       has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `predictions_idx`. Values should be in range [0, num_classes], where
+      `predictions`. Values should be in range [0, num_classes], where
       num_classes is the last dimension of `predictions`.
     k: Integer, k for @k metric. This will calculate an average precision for
       range `[1,k]`, as documented above.
@@ -1698,7 +1704,7 @@ def streaming_sparse_average_precision_at_k(predictions,
       [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
       target classes for the associated prediction. Commonly, N=1 and `labels`
       has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `predictions_idx`. Values should be in range [0, num_classes], where
+      `predictions`. Values should be in range [0, num_classes], where
       num_classes is the last dimension of `predictions`.
     k: Integer, k for @k metric. This will calculate an average precision for
       range `[1,k]`, as documented above.
@@ -1770,9 +1776,8 @@ def _select_class_id(ids, selected_id):
     selected_id: Int id to select.
 
   Returns:
-    `SparseTensor` of same dimensions as `ids`, except for the last dimension,
-    which might be smaller. This contains only the entries equal to
-    `selected_id`.
+    `SparseTensor` of same dimensions as `ids`. This contains only the entries
+    equal to `selected_id`.
   """
   if isinstance(ids, (ops.SparseTensor, ops.SparseTensorValue)):
     return sparse_ops.sparse_retain(
@@ -1782,7 +1787,7 @@ def _select_class_id(ids, selected_id):
   # tf.equal and tf.reduce_any?
 
   # Shape of filled IDs is the same as `ids` with the last dim collapsed to 1.
-  ids_shape = array_ops.shape(ids)
+  ids_shape = array_ops.shape(ids, out_type=dtypes.int64)
   ids_last_dim = array_ops.size(ids_shape) - 1
   filled_selected_id_shape = math_ops.reduced_shape(
       ids_shape, array_ops.reshape(ids_last_dim, [1]))
@@ -1790,7 +1795,9 @@ def _select_class_id(ids, selected_id):
   # Intersect `ids` with the selected ID.
   filled_selected_id = array_ops.fill(
       filled_selected_id_shape, math_ops.to_int64(selected_id))
-  return set_ops.set_intersection(filled_selected_id, ids)
+  result = set_ops.set_intersection(filled_selected_id, ids)
+  return ops.SparseTensor(
+      indices=result.indices, values=result.values, shape=ids_shape)
 
 
 def _maybe_select_class_id(labels, predictions_idx, selected_id=None):
@@ -2827,7 +2834,8 @@ def streaming_concat(values,
     # applied to contiguous slices
     init_size = 0 if max_size is None else max_size
     init_shape = [init_size] + fixed_shape
-    array = _create_local('array', shape=init_shape, dtype=values.dtype)
+    array = _create_local(
+        'array', shape=init_shape, validate_shape=False, dtype=values.dtype)
     size = _create_local('size', shape=[], dtype=dtypes.int32)
 
     perm = [0 if n == axis else n + 1 if n < axis else n for n in range(ndim)]
@@ -2900,6 +2908,7 @@ def aggregate_metric_map(names_to_tuples):
   This function is useful for pairing metric names with their associated value
   and update ops when the list of metrics is long. For example:
 
+  ```python
     metrics_to_values, metrics_to_updates = slim.metrics.aggregate_metric_map({
         'Mean Absolute Error': new_slim.metrics.streaming_mean_absolute_error(
             predictions, labels, weights),
@@ -2910,6 +2919,7 @@ def aggregate_metric_map(names_to_tuples):
         'RMSE Log': new_slim.metrics.streaming_root_mean_squared_error(
             predictions, labels, weights),
     })
+  ```
 
   Args:
     names_to_tuples: a map of metric names to tuples, each of which contain the
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
index efcd1de4fe6..40a68794563 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
@@ -132,6 +132,10 @@ def _binary_3d_label_to_sparse(labels):
   return tf.SparseTensor.from_value(_binary_3d_label_to_sparse_value(labels))
 
 
+def _assert_nan(test_case, actual):
+  test_case.assertTrue(math.isnan(actual), 'Expected NAN, got %s.' % actual)
+
+
 class StreamingMeanTest(tf.test.TestCase):
 
   def setUp(self):
@@ -1603,6 +1607,9 @@ class StreamingPrecisionRecallThresholdsTest(tf.test.TestCase):
       self.assertAlmostEqual(expected_rec, rec.eval(), 2)
 
 
+# TODO(ptucker): Remove when we remove `streaming_recall_at_k`.
+# This op will be deprecated soon in favor of `streaming_sparse_recall_at_k`.
+# Until then, this test validates that both ops yield the same results.
 class StreamingRecallAtKTest(tf.test.TestCase):
 
   def setUp(self):
@@ -1639,57 +1646,78 @@ class StreamingRecallAtKTest(tf.test.TestCase):
     predictions = tf.constant(self._np_predictions,
                               shape=(self._batch_size, self._num_classes),
                               dtype=tf.float32)
-    labels = tf.constant(self._np_labels, shape=(self._batch_size,))
+    labels = tf.constant(
+        self._np_labels, shape=(self._batch_size,), dtype=tf.int64)
     recall, update_op = metrics.streaming_recall_at_k(
         predictions, labels, k=1)
+    sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k(
+        predictions, tf.reshape(labels, (self._batch_size, 1)), k=1)
 
     with self.test_session() as sess:
       sess.run(tf.initialize_local_variables())
       self.assertEqual(0.25, sess.run(update_op))
       self.assertEqual(0.25, recall.eval())
+      self.assertEqual(0.25, sess.run(sp_update_op))
+      self.assertEqual(0.25, sp_recall.eval())
 
   def testSingleUpdateKIs2(self):
     predictions = tf.constant(self._np_predictions,
                               shape=(self._batch_size, self._num_classes),
                               dtype=tf.float32)
-    labels = tf.constant(self._np_labels, shape=(self._batch_size,))
+    labels = tf.constant(
+        self._np_labels, shape=(self._batch_size,), dtype=tf.int64)
     recall, update_op = metrics.streaming_recall_at_k(
         predictions, labels, k=2)
+    sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k(
+        predictions, tf.reshape(labels, (self._batch_size, 1)), k=2)
 
     with self.test_session() as sess:
       sess.run(tf.initialize_local_variables())
       self.assertEqual(0.5, sess.run(update_op))
       self.assertEqual(0.5, recall.eval())
+      self.assertEqual(0.5, sess.run(sp_update_op))
+      self.assertEqual(0.5, sp_recall.eval())
 
   def testSingleUpdateKIs3(self):
     predictions = tf.constant(self._np_predictions,
                               shape=(self._batch_size, self._num_classes),
                               dtype=tf.float32)
-    labels = tf.constant(self._np_labels, shape=(self._batch_size,))
+    labels = tf.constant(
+        self._np_labels, shape=(self._batch_size,), dtype=tf.int64)
     recall, update_op = metrics.streaming_recall_at_k(
         predictions, labels, k=3)
+    sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k(
+        predictions, tf.reshape(labels, (self._batch_size, 1)), k=3)
 
     with self.test_session() as sess:
       sess.run(tf.initialize_local_variables())
       self.assertEqual(1.0, sess.run(update_op))
       self.assertEqual(1.0, recall.eval())
+      self.assertEqual(1.0, sess.run(sp_update_op))
+      self.assertEqual(1.0, sp_recall.eval())
 
   def testSingleUpdateSomeMissingKIs2(self):
     predictions = tf.constant(self._np_predictions,
                               shape=(self._batch_size, self._num_classes),
                               dtype=tf.float32)
-    labels = tf.constant(self._np_labels, shape=(self._batch_size,))
+    labels = tf.constant(
+        self._np_labels, shape=(self._batch_size,), dtype=tf.int64)
     weights = tf.constant([0, 1, 1, 1], shape=(self._batch_size,),
                           dtype=tf.float32)
     mask = tf.constant([False, False, True, False], shape=(self._batch_size,),
                        dtype=tf.bool)
     recall, update_op = metrics.streaming_recall_at_k(
         predictions, labels, k=2, ignore_mask=mask, weights=weights)
+    sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k(
+        predictions, tf.reshape(labels, (self._batch_size, 1)), k=2,
+        ignore_mask=mask, weights=weights)
 
     with self.test_session() as sess:
       sess.run(tf.initialize_local_variables())
       self.assertEqual(1.0, sess.run(update_op))
       self.assertEqual(1.0, recall.eval())
+      self.assertEqual(1.0, sess.run(sp_update_op))
+      self.assertEqual(1.0, sp_recall.eval())
 
 
 class StreamingSparsePrecisionTest(tf.test.TestCase):
@@ -1718,8 +1746,8 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
 
       # Run per-step op and assert expected values.
       if math.isnan(expected):
-        self.assertTrue(math.isnan(update.eval()))
-        self.assertTrue(math.isnan(metric.eval()))
+        _assert_nan(self, update.eval())
+        _assert_nan(self, metric.eval())
       else:
         self.assertEqual(expected, update.eval())
         self.assertEqual(expected, metric.eval())
@@ -1735,7 +1763,7 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
         ignore_mask = tf.constant(ignore_mask, tf.bool)
       predictions = tf.constant(predictions, tf.float32)
       metric = metric_ops.sparse_average_precision_at_k(
-          predictions=predictions, labels=labels, k=k)
+          predictions, labels, k)
       self.assertAllEqual(expected, metric.eval())
 
   def _test_streaming_sparse_average_precision_at_k(
@@ -1745,7 +1773,7 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
         weights = tf.constant(weights, tf.float32)
       predictions = tf.constant(predictions, tf.float32)
       metric, update = metrics.streaming_sparse_average_precision_at_k(
-          predictions=predictions, labels=labels, k=k, weights=weights)
+          predictions, labels, k, weights=weights)
 
       # Fails without initialized vars.
       self.assertRaises(tf.OpError, metric.eval)
@@ -1755,8 +1783,8 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
 
       # Run per-step op and assert expected values.
       if math.isnan(expected):
-        self.assertTrue(math.isnan(update.eval()))
-        self.assertTrue(math.isnan(metric.eval()))
+        _assert_nan(self, update.eval())
+        _assert_nan(self, metric.eval())
       else:
         self.assertAlmostEqual(expected, update.eval())
         self.assertAlmostEqual(expected, metric.eval())
@@ -1849,89 +1877,97 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
           predictions, labels, k, expected=streaming_average_precision[i],
           weights=weights)
 
-  def test_one_label_at_k1_no_predictions(self):
+  def test_one_label_at_k1_nan(self):
     predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)
 
-    # Classes 0,1,2 have 0 predictions, class 4 is out of range.
-    for class_id in [0, 1, 2, 4]:
-      self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=1, expected=NAN, class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 0,1,2 have 0 predictions, class 4 is out of range.
+      for class_id in (0, 1, 2, 4):
+        self._test_streaming_sparse_precision_at_k(
+            predictions, labels, k=1, expected=NAN, class_id=class_id)
 
   def test_one_label_at_k1(self):
     predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)
 
-    # Class 3: 1 label, 2 predictions, 1 correct.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=1, expected=1.0 / 2, class_id=3)
+    for labels in (sparse_labels, dense_labels):
+      # Class 3: 1 label, 2 predictions, 1 correct.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=1, expected=1.0 / 2, class_id=3)
 
-    # All classes: 2 labels, 2 predictions, 1 correct.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=1, expected=1.0 / 2)
+      # All classes: 2 labels, 2 predictions, 1 correct.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=1, expected=1.0 / 2)
 
   def test_three_labels_at_k5_no_predictions(self):
     predictions = [
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]
     ]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
-    ]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    ])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)
 
-    # Classes 1,3,8 have 0 predictions, class 10 is out of range.
-    for class_id in [1, 3, 8, 10]:
-      self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 1,3,8 have 0 predictions, class 10 is out of range.
+      for class_id in (1, 3, 8, 10):
+        self._test_streaming_sparse_precision_at_k(
+            predictions, labels, k=5, expected=NAN, class_id=class_id)
 
   def test_three_labels_at_k5_no_labels(self):
     predictions = [
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]
     ]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
-    ]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    ])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)
 
-    # Classes 0,4,6,9: 0 labels, >=1 prediction.
-    for class_id in [0, 4, 6, 9]:
-      self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=0.0, class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 0,4,6,9: 0 labels, >=1 prediction.
+      for class_id in (0, 4, 6, 9):
+        self._test_streaming_sparse_precision_at_k(
+            predictions, labels, k=5, expected=0.0, class_id=class_id)
 
   def test_three_labels_at_k5(self):
     predictions = [
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]
     ]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
-    ]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    ])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)
 
-    # Class 2: 2 labels, 2 correct predictions.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2, class_id=2)
+    for labels in (sparse_labels, dense_labels):
+      # Class 2: 2 labels, 2 correct predictions.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=5, expected=2.0 / 2,
+          class_id=2)
 
-    # Class 5: 1 label, 1 correct prediction.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 1, class_id=5)
+      # Class 5: 1 label, 1 correct prediction.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=5, expected=1.0 / 1, class_id=5)
 
-    # Class 7: 1 label, 1 incorrect prediction.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=0.0 / 1, class_id=7)
+      # Class 7: 1 label, 1 incorrect prediction.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=5, expected=0.0 / 1, class_id=7)
 
-    # All classes: 10 predictions, 3 correct.
-    self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=3.0 / 10)
+      # All classes: 10 predictions, 3 correct.
+      self._test_streaming_sparse_precision_at_k(
+          predictions, labels, k=5, expected=3.0 / 10)
 
-  def test_3d_no_predictions(self):
+  def test_3d_nan(self):
     predictions = [[
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]
@@ -1939,19 +1975,18 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
     ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
     ], [
         [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
 
     # Classes 1,3,8 have 0 predictions, class 10 is out of range.
-    for class_id in [1, 3, 8, 10]:
+    for class_id in (1, 3, 8, 10):
       self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id)
+          predictions, labels, k=5, expected=NAN, class_id=class_id)
 
   def test_3d_no_labels(self):
     predictions = [[
@@ -1961,19 +1996,18 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
     ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
     ], [
         [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
 
     # Classes 0,4,6,9: 0 labels, >=1 prediction.
-    for class_id in [0, 4, 6, 9]:
+    for class_id in (0, 4, 6, 9):
       self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=0.0, class_id=class_id)
+          predictions, labels, k=5, expected=0.0, class_id=class_id)
 
   def test_3d(self):
     predictions = [[
@@ -1983,30 +2017,29 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
     ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
     ], [
         [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
 
     # Class 2: 4 predictions, all correct.
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=4.0 / 4, class_id=2)
+        predictions, labels, k=5, expected=4.0 / 4, class_id=2)
 
     # Class 5: 2 predictions, both correct.
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2, class_id=5)
+        predictions, labels, k=5, expected=2.0 / 2, class_id=5)
 
     # Class 7: 2 predictions, 1 correct.
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 2, class_id=7)
+        predictions, labels, k=5, expected=1.0 / 2, class_id=7)
 
     # All classes: 20 predictions, 7 correct.
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=7.0 / 20)
+        predictions, labels, k=5, expected=7.0 / 20)
 
   def test_3d_ignore_all(self):
     predictions = [[
@@ -2016,28 +2049,26 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
     ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
     ], [
         [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
 
     for class_id in xrange(10):
       self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id,
+          predictions, labels, k=5, expected=NAN, class_id=class_id,
           weights=[[0], [0]])
       self._test_streaming_sparse_precision_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id,
+          predictions, labels, k=5, expected=NAN, class_id=class_id,
           weights=[[0, 0], [0, 0]])
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=NAN,
-        ignore_mask=[[False], [True]], weights=[[0], [1]])
+        predictions, labels, k=5, expected=NAN, ignore_mask=[[False], [True]],
+        weights=[[0], [1]])
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=NAN,
-        weights=[[0, 0], [0, 0]])
+        predictions, labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]])
 
   def test_3d_ignore_some(self):
     predictions = [[
@@ -2047,43 +2078,42 @@ class StreamingSparsePrecisionTest(tf.test.TestCase):
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
     ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
     ], [
         [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
 
     # Class 2: 2 predictions, both correct.
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2,
+        predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
         ignore_mask=[[False], [False]], weights=[[1], [0]])
 
     # Class 2: 2 predictions, both correct.
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2,
+        predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
         ignore_mask=[[False], [False]], weights=[[0], [1]])
 
     # Class 7: 1 incorrect prediction.
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=0.0 / 1.0, class_id=7,
+        predictions, labels, k=5, expected=0.0 / 1.0, class_id=7,
         ignore_mask=[[False], [True]], weights=[[1], [1]])
 
     # Class 7: 1 correct prediction.
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 1.0, class_id=7,
+        predictions, labels, k=5, expected=1.0 / 1.0, class_id=7,
         ignore_mask=[[True], [False]], weights=[[1], [1]])
 
     # Class 7: no predictions.
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=NAN, class_id=7,
+        predictions, labels, k=5, expected=NAN, class_id=7,
         weights=[[1, 0], [0, 1]])
 
     # Class 7: 2 predictions, 1 correct.
     self._test_streaming_sparse_precision_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 2.0, class_id=7,
+        predictions, labels, k=5, expected=1.0 / 2.0, class_id=7,
         weights=[[0, 1], [1, 0]])
 
   def test_sparse_tensor_value(self):
@@ -2127,177 +2157,172 @@ class StreamingSparseRecallTest(tf.test.TestCase):
 
       # Run per-step op and assert expected values.
       if math.isnan(expected):
-        self.assertTrue(math.isnan(update.eval()))
-        self.assertTrue(math.isnan(metric.eval()))
+        _assert_nan(self, update.eval())
+        _assert_nan(self, metric.eval())
       else:
         self.assertEqual(expected, update.eval())
         self.assertEqual(expected, metric.eval())
 
-  def test_one_label_at_k1_empty_classes(self):
+  def test_one_label_at_k1_nan(self):
     predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)
 
     # Classes 0,1 have 0 labels, 0 predictions, class 4 is out of range.
-    for class_id in [0, 1, 4]:
-      self._test_streaming_sparse_recall_at_k(
-          predictions=predictions, labels=sp_labels, k=1, expected=NAN,
-          class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      for class_id in (0, 1, 4):
+        self._test_streaming_sparse_recall_at_k(
+            predictions, labels, k=1, expected=NAN,
+            class_id=class_id)
 
   def test_one_label_at_k1_no_predictions(self):
     predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)
 
-    # Class 2: 0 predictions.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.0,
-        class_id=2)
+    for labels in (sparse_labels, dense_labels):
+      # Class 2: 0 predictions.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.0,
+          class_id=2)
 
   def test_one_label_at_k1(self):
     predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)
 
-    # Class 3: 1 label, 2 predictions, 1 correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        class_id=3)
+    for labels in (sparse_labels, dense_labels):
+      # Class 3: 1 label, 2 predictions, 1 correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1,
+          class_id=3)
 
-    # All classes: 2 labels, 2 predictions, 1 correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2)
+      # All classes: 2 labels, 2 predictions, 1 correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 2)
 
   def test_one_label_at_k1_weighted(self):
     predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]]
-    labels = [[0, 0, 0, 1], [0, 0, 1, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+    sparse_labels = _binary_2d_label_to_sparse_value(
+        [[0, 0, 0, 1], [0, 0, 1, 0]])
+    dense_labels = np.array([[3], [2]], dtype=np.int64)
 
-    # Class 3: 1 label, 2 predictions, 1 correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=NAN,
-        class_id=3, weights=(0.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        class_id=3, weights=(1.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        class_id=3, weights=(2.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=NAN,
-        class_id=3, weights=(0.0, 0.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=NAN,
-        class_id=3, weights=(0.0, 1.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        class_id=3, weights=(1.0, 0.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        class_id=3, weights=(1.0, 1.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=2.0 / 2,
-        class_id=3, weights=(2.0, 3.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=3.0 / 3,
-        class_id=3, weights=(3.0, 2.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.3 / 0.3,
-        class_id=3, weights=(0.3, 0.6))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.6 / 0.6,
-        class_id=3, weights=(0.6, 0.3))
+    for labels in (sparse_labels, dense_labels):
+      # Class 3: 1 label, 2 predictions, 1 correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=NAN, class_id=3, weights=(0.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1, class_id=3,
+          weights=(1.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1, class_id=3,
+          weights=(2.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=NAN, class_id=3,
+          weights=(0.0, 0.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=NAN, class_id=3,
+          weights=(0.0, 1.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1, class_id=3,
+          weights=(1.0, 0.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1, class_id=3,
+          weights=(1.0, 1.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=2.0 / 2, class_id=3,
+          weights=(2.0, 3.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=3.0 / 3, class_id=3,
+          weights=(3.0, 2.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.3 / 0.3, class_id=3,
+          weights=(0.3, 0.6))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.6 / 0.6, class_id=3,
+          weights=(0.6, 0.3))
 
-    # All classes: 2 labels, 2 predictions, 1 correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=NAN,
-        weights=(0.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2,
-        weights=(1.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2,
-        weights=(2.0,))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1,
-        weights=(1.0, 0.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.0 / 1,
-        weights=(0.0, 1.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2,
-        weights=(1.0, 1.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=2.0 / 5,
-        weights=(2.0, 3.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=3.0 / 5,
-        weights=(3.0, 2.0))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.3 / 0.9,
-        weights=(0.3, 0.6))
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=1, expected=0.6 / 0.9,
-        weights=(0.6, 0.3))
+      # All classes: 2 labels, 2 predictions, 1 correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=NAN, weights=(0.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 2, weights=(1.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 2, weights=(2.0,))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 1, weights=(1.0, 0.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.0 / 1, weights=(0.0, 1.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=1.0 / 2, weights=(1.0, 1.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=2.0 / 5, weights=(2.0, 3.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=3.0 / 5, weights=(3.0, 2.0))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.3 / 0.9, weights=(0.3, 0.6))
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=1, expected=0.6 / 0.9, weights=(0.6, 0.3))
 
-  def test_three_labels_at_k5_no_labels(self):
+  def test_three_labels_at_k5_nan(self):
     predictions = [
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
-        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)
 
-    # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range.
-    for class_id in [0, 3, 4, 6, 9, 10]:
-      self._test_streaming_sparse_recall_at_k(
-          predictions=predictions, labels=sp_labels, k=5, expected=NAN,
-          class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range.
+      for class_id in (0, 3, 4, 6, 9, 10):
+        self._test_streaming_sparse_recall_at_k(
+            predictions, labels, k=5, expected=NAN, class_id=class_id)
 
   def test_three_labels_at_k5_no_predictions(self):
     predictions = [
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
-        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)
 
-    # Class 8: 1 label, no predictions.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=5, expected=0.0 / 1,
-        class_id=8)
+    for labels in (sparse_labels, dense_labels):
+      # Class 8: 1 label, no predictions.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=5, expected=0.0 / 1, class_id=8)
 
   def test_three_labels_at_k5(self):
     predictions = [
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]]
-    labels = [
+    sparse_labels = _binary_2d_label_to_sparse_value([
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
-        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]
-    sp_labels = _binary_2d_label_to_sparse_value(labels)
+        [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]])
+    dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64)
 
-    # Class 2: 2 labels, both correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=5, expected=2.0 / 2,
-        class_id=2)
+    for labels in (sparse_labels, dense_labels):
+      # Class 2: 2 labels, both correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=5, expected=2.0 / 2, class_id=2)
 
-    # Class 5: 1 label, incorrect.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=5, expected=1.0 / 1,
-        class_id=5)
+      # Class 5: 1 label, incorrect.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=5, expected=1.0 / 1, class_id=5)
 
-    # Class 7: 1 label, incorrect.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=5, expected=0.0 / 1,
-        class_id=7)
+      # Class 7: 1 label, incorrect.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=5, expected=0.0 / 1, class_id=7)
 
-    # All classes: 6 labels, 3 correct.
-    self._test_streaming_sparse_recall_at_k(
-        predictions=predictions, labels=sp_labels, k=5, expected=3.0 / 6)
+      # All classes: 6 labels, 3 correct.
+      self._test_streaming_sparse_recall_at_k(
+          predictions, labels, k=5, expected=3.0 / 6)
 
-  def test_3d_no_labels(self):
+  def test_3d_nan(self):
     predictions = [[
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9],
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]
@@ -2305,19 +2330,26 @@ class StreamingSparseRecallTest(tf.test.TestCase):
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
     ]]
-    labels = [[
+    sparse_labels = _binary_3d_label_to_sparse_value([[
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
     ], [
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
+    dense_labels = np.array([[
+        [2, 7, 8],
+        [1, 2, 5]
+    ], [
+        [1, 2, 5],
+        [2, 7, 8],
+    ]], dtype=np.int64)
 
-    # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range.
-    for class_id in [0, 3, 4, 6, 9, 10]:
-      self._test_streaming_sparse_recall_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range.
+      for class_id in (0, 3, 4, 6, 9, 10):
+        self._test_streaming_sparse_recall_at_k(
+            predictions, labels, k=5, expected=NAN, class_id=class_id)
 
   def test_3d_no_predictions(self):
     predictions = [[
@@ -2327,19 +2359,26 @@ class StreamingSparseRecallTest(tf.test.TestCase):
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
     ]]
-    labels = [[
+    sparse_labels = _binary_3d_label_to_sparse_value([[
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
     ], [
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
+    dense_labels = np.array([[
+        [2, 7, 8],
+        [1, 2, 5]
+    ], [
+        [1, 2, 5],
+        [2, 7, 8],
+    ]], dtype=np.int64)
 
-    # Classes 1,8 have 0 predictions, >=1 label.
-    for class_id in [1, 8]:
-      self._test_streaming_sparse_recall_at_k(
-          predictions, sp_labels, k=5, expected=0.0, class_id=class_id)
+    for labels in (sparse_labels, dense_labels):
+      # Classes 1,8 have 0 predictions, >=1 label.
+      for class_id in (1, 8):
+        self._test_streaming_sparse_recall_at_k(
+            predictions, labels, k=5, expected=0.0, class_id=class_id)
 
   def test_3d(self):
     predictions = [[
@@ -2349,30 +2388,29 @@ class StreamingSparseRecallTest(tf.test.TestCase):
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
     ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
     ], [
         [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
 
     # Class 2: 4 labels, all correct.
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=4.0 / 4, class_id=2)
+        predictions, labels, k=5, expected=4.0 / 4, class_id=2)
 
     # Class 5: 2 labels, both correct.
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2, class_id=5)
+        predictions, labels, k=5, expected=2.0 / 2, class_id=5)
 
     # Class 7: 2 labels, 1 incorrect.
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 2, class_id=7)
+        predictions, labels, k=5, expected=1.0 / 2, class_id=7)
 
     # All classes: 12 labels, 7 correct.
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=7.0 / 12)
+        predictions, labels, k=5, expected=7.0 / 12)
 
   def test_3d_ignore_all(self):
     predictions = [[
@@ -2382,27 +2420,26 @@ class StreamingSparseRecallTest(tf.test.TestCase):
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
     ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
     ], [
         [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
 
     for class_id in xrange(10):
       self._test_streaming_sparse_recall_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id,
+          predictions, labels, k=5, expected=NAN, class_id=class_id,
           weights=[[0], [0]])
       self._test_streaming_sparse_recall_at_k(
-          predictions, sp_labels, k=5, expected=NAN, class_id=class_id,
+          predictions, labels, k=5, expected=NAN, class_id=class_id,
           weights=[[0, 0], [0, 0]])
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=NAN,
-        ignore_mask=[[False], [True]], weights=[[0], [1]])
+        predictions, labels, k=5, expected=NAN, ignore_mask=[[False], [True]],
+        weights=[[0], [1]])
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]])
+        predictions, labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]])
 
   def test_3d_ignore_some(self):
     predictions = [[
@@ -2412,43 +2449,42 @@ class StreamingSparseRecallTest(tf.test.TestCase):
         [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6],
         [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9]
     ]]
-    labels = [[
+    labels = _binary_3d_label_to_sparse_value([[
         [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
         [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]
     ], [
         [0, 1, 1, 0, 0, 1, 0, 1, 0, 0],
         [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
-    ]]
-    sp_labels = _binary_3d_label_to_sparse_value(labels)
+    ]])
 
     # Class 2: 2 labels, both correct.
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2,
+        predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
         ignore_mask=[[False], [False]], weights=[[1], [0]])
 
     # Class 2: 2 labels, both correct.
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2,
+        predictions, labels, k=5, expected=2.0 / 2.0, class_id=2,
         ignore_mask=[[False], [False]], weights=[[0], [1]])
 
     # Class 7: 1 label, correct.
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 1.0, class_id=7,
+        predictions, labels, k=5, expected=1.0 / 1.0, class_id=7,
         ignore_mask=[[True], [False]], weights=[[1], [1]])
 
     # Class 7: 1 label, incorrect.
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=0.0 / 1.0, class_id=7,
+        predictions, labels, k=5, expected=0.0 / 1.0, class_id=7,
         ignore_mask=[[False], [True]], weights=[[1], [1]])
 
     # Class 7: 2 labels, 1 correct.
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=1.0 / 2.0, class_id=7,
+        predictions, labels, k=5, expected=1.0 / 2.0, class_id=7,
         weights=[[1, 0], [1, 0]])
 
     # Class 7: No labels.
     self._test_streaming_sparse_recall_at_k(
-        predictions, sp_labels, k=5, expected=NAN, class_id=7,
+        predictions, labels, k=5, expected=NAN, class_id=7,
         weights=[[0, 1], [0, 1]])
 
   def test_sparse_tensor_value(self):
@@ -3678,6 +3714,22 @@ class StreamingConcatTest(tf.test.TestCase):
     with self.assertRaises(ValueError):
       metrics.streaming_concat(tf.placeholder(tf.float32, [None, None]))
 
+  def testStreamingConcatReset(self):
+    with self.test_session() as sess:
+      values = tf.placeholder(tf.int32, [None])
+      concatenated, update_op = metrics.streaming_concat(values)
+      sess.run(tf.initialize_local_variables())
+
+      self.assertAllEqual([], concatenated.eval())
+
+      sess.run([update_op], feed_dict={values: [0, 1, 2]})
+      self.assertAllEqual([0, 1, 2], concatenated.eval())
+
+      sess.run(tf.initialize_local_variables())
+
+      sess.run([update_op], feed_dict={values: [3, 4]})
+      self.assertAllEqual([3, 4], concatenated.eval())
+
 
 class AggregateMetricsTest(tf.test.TestCase):
 
@@ -3928,7 +3980,8 @@ class ExpandAndTileTest(tf.test.TestCase):
           indices=[[0, i[0], i[1]] for i in x.indices], values=x.values,
           shape=[1, 3, 3])
       self._assert_sparse_tensors_equal(
-          expected_result_dim0, metric_ops.expand_and_tile(x, multiple=1).eval())
+          expected_result_dim0,
+          metric_ops.expand_and_tile(x, multiple=1).eval())
       for dim in (-2, 0):
         self._assert_sparse_tensors_equal(
             expected_result_dim0,
diff --git a/tensorflow/contrib/quantization/kernels/hexagon/BUILD b/tensorflow/contrib/quantization/kernels/hexagon/BUILD
index b57a2ac1b59..df0dbf94ce3 100644
--- a/tensorflow/contrib/quantization/kernels/hexagon/BUILD
+++ b/tensorflow/contrib/quantization/kernels/hexagon/BUILD
@@ -11,6 +11,7 @@ licenses(["notice"])  # Apache 2.0
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_cc_test",
+    "tf_kernel_library",
 )
 
 filegroup(
@@ -43,3 +44,36 @@ tf_cc_test(
         "//tensorflow/core/kernels:ops_util",
     ],
 )
+
+tf_cc_test(
+    name = "graph_transferer_test",
+    size = "small",
+    srcs = ["graph_transferer_test.cc"],
+    deps = [
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/contrib/quantization/kernels/hexagon:graph_transferer",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:direct_session",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:ops",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
+tf_kernel_library(
+    name = "graph_transferer",
+    srcs = [
+        "graph_transferer.cc",
+    ],
+    hdrs = [
+        "graph_transferer.h",
+    ],
+    deps = [
+        "//tensorflow/core",
+        "//tensorflow/core:framework",
+        "//third_party/eigen3",
+    ],
+)
diff --git a/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.cc b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.cc
new file mode 100644
index 00000000000..2bcb6ac652c
--- /dev/null
+++ b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.cc
@@ -0,0 +1,23 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h"
+
+namespace tensorflow {
+void GraphTransferer::LoadGraphFromProto(
+    ::tensorflow::protobuf::MessageLite* proto) {
+  // TODO(satok): implement
+}
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h
new file mode 100644
index 00000000000..5d83283c1b9
--- /dev/null
+++ b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h
@@ -0,0 +1,40 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+vcyou may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_LOADER_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_LOADER_H_
+
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/protobuf.h"
+
+namespace tensorflow {
+
+// GraphTransferer transfers graph definitions into SoC memory.
+// This functionality is effective if SoC is capable to run
+// the graph on that chip.
+// TODO(satok): support transferring subgraphs to be able to split graphs
+// to avoid unsupported ops in SoC.
+class GraphTransferer {
+ public:
+  GraphTransferer() = default;
+  void LoadGraphFromProto(::tensorflow::protobuf::MessageLite* proto);
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(GraphTransferer);
+};
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_TRANSFERER_H
diff --git a/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer_test.cc b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer_test.cc
new file mode 100644
index 00000000000..21d53816559
--- /dev/null
+++ b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer_test.cc
@@ -0,0 +1,57 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h"
+#include "tensorflow/cc/ops/const_op.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/graph/graph_def_builder.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/public/session_options.h"
+
+namespace tensorflow {
+
+class GraphTransfererTest : public ::testing::Test {
+ protected:
+  void SetUp() final {
+    SessionOptions session_options;
+    session_options.env = Env::Default();
+    _session = std::unique_ptr<Session>(NewSession(session_options));
+  }
+
+  std::unique_ptr<Session> _session;
+};
+
+static GraphDef CreateSmallGraphDef() {
+  Scope root = Scope::NewRootScope();
+  ops::Output node_a = ops::Const(root.WithOpName("a"), 1);
+  ops::Output node_b = ops::Const(root.WithOpName("b"), 2);
+  ops::Add(root.WithOpName("a_plus_b"), node_a, node_b);
+
+  GraphDef def;
+  TF_CHECK_OK(root.ToGraphDef(&def));
+  return def;
+}
+
+TEST_F(GraphTransfererTest, LoadGraph) {
+  GraphDef def = CreateSmallGraphDef();
+  _session->Create(def);
+
+  GraphTransferer gt;
+  gt.LoadGraphFromProto(&def);
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
index 3d139fbe0a0..f5b7f482e27 100644
--- a/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
+++ b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
@@ -30,7 +30,7 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 
 #ifdef USE_HEXAGON_LIBS
-#include "tensorflow/core/platform/hexagon/gemm_wrapper.h"
+#include "tensorflow/core/platform/hexagon/soc_interface.h"
 #include "tensorflow/core/platform/profile_utils/cpu_utils.h"
 #endif
 
@@ -42,9 +42,9 @@ class QuantizedMatMulOpForHexagonTest : public OpsTestBase {
 #ifdef USE_HEXAGON_LIBS
     profile_utils::CpuUtils::EnableClockCycleProfiling(true);
     LOG(INFO) << "Hexagon libs are linked (wrapper version = "
-              << hexagon_gemm_wrapper_GetWrapperVersion()
+              << soc_interface_GetWrapperVersion()
               << ", hexagon binary version = "
-              << hexagon_gemm_wrapper_GetHexagonBinaryVersion() << ")";
+              << soc_interface_GetHexagonBinaryVersion() << ")";
     LOG(INFO) << "Cpu frequency = "
               << profile_utils::CpuUtils::GetCycleCounterFrequency();
 #else
@@ -58,15 +58,14 @@ class QuantizedMatMulOpForHexagonTest : public OpsTestBase {
 TEST_F(QuantizedMatMulOpForHexagonTest, EvaluateSharedLibOverhead) {
   const uint64 overhead_shared_lib_start =
       profile_utils::CpuUtils::GetCurrentClockCycle();
-  const int wrapper_version = hexagon_gemm_wrapper_GetWrapperVersion();
+  const int wrapper_version = soc_interface_GetWrapperVersion();
   const uint64 overhead_shared_lib_end =
       profile_utils::CpuUtils::GetCurrentClockCycle();
   const uint64 overhead_shared_lib_diff =
       (overhead_shared_lib_end - overhead_shared_lib_start);
   const uint64 overhead_hexagon_rpc_start =
       profile_utils::CpuUtils::GetCurrentClockCycle();
-  const int hexagon_binary_version =
-      hexagon_gemm_wrapper_GetHexagonBinaryVersion();
+  const int hexagon_binary_version = soc_interface_GetHexagonBinaryVersion();
   const uint64 overhead_hexagon_rpc_end =
       profile_utils::CpuUtils::GetCurrentClockCycle();
   const uint64 overhead_hexagon_rpc_diff =
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
index 5fd9653b50b..30134016845 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
@@ -359,7 +359,7 @@ class LSTMBlockCellTest(tf.test.TestCase):
         inp = tf.convert_to_tensor(
             np.random.randn(batch_size, input_size), dtype=tf.float32)
         inputs.append(inp)
-      seq_lengths = [3, 4, 5]
+      seq_lengths = tf.constant([3, 4, 5])
 
       initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890213)
       with tf.variable_scope("basic", initializer=initializer):
@@ -400,7 +400,7 @@ class LSTMBlockCellTest(tf.test.TestCase):
         outputs = []
         state = None
         for i, inp in enumerate(inputs):
-          lengths = [int(i < l) for l in seq_lengths]
+          lengths = [int(i < l) for l in seq_lengths.eval()]
           output, state = cell(
               [inp],
               initial_state=state,
diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
index 329016e71e0..2ca5c039e27 100644
--- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
@@ -532,7 +532,7 @@ class LSTMBlockWrapper(fused_rnn_cell.FusedRNNCell):
           dtype = initial_state[0].dtype
 
       # create the actual cell
-      if sequence_length:
+      if sequence_length is not None:
         sequence_length = ops.convert_to_tensor(sequence_length)
       initial_cell_state, initial_output = initial_state  # pylint: disable=unpacking-non-sequence
       cell_states, outputs = self._call_cell(inputs, initial_cell_state,
diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD
index fc0a02429d8..ce4f2c8780c 100644
--- a/tensorflow/contrib/session_bundle/BUILD
+++ b/tensorflow/contrib/session_bundle/BUILD
@@ -106,20 +106,12 @@ filegroup(
 
 cc_library(
     name = "session_bundle",
-    srcs = ["session_bundle.cc"],
     hdrs = ["session_bundle.h"],
-    copts = if_ios(["-DGOOGLE_LOGGING"]),
     visibility = ["//visibility:public"],
     deps = [
+        ":session_bundle_lite",
         ":signature",
-    ] + if_not_mobile([
-        ":manifest_proto_cc",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:protos_all_cc",
-        # mobile not supported yet
-    ]),
+    ],
 )
 
 # This is a lite version of the session_bundle target that does not link in any
@@ -139,6 +131,7 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/util/tensor_bundle:naming",
         # mobile not supported yet
     ]),
 )
@@ -198,21 +191,6 @@ py_test(
     ],
 )
 
-cc_library(
-    name = "signature",
-    srcs = ["signature.cc"],
-    hdrs = ["signature.h"],
-    visibility = ["//visibility:public"],
-    deps = if_not_mobile([
-        ":manifest_proto_cc",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:tensorflow_opensource",
-        # mobile not supported yet
-    ]),
-)
-
 # This is a lite version of the signature target that does not link in any
 # Tensorflow ops in order to minimize its size. Clients using this should
 # link any required ops manually.
@@ -230,6 +208,17 @@ cc_library(
     ]),
 )
 
+cc_library(
+    name = "signature",
+    hdrs = ["signature.h"],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":signature_lite",
+    ] + if_not_mobile([
+        "//tensorflow/core:tensorflow_opensource",
+    ]),
+)
+
 cc_test(
     name = "signature_test",
     size = "small",
diff --git a/tensorflow/contrib/session_bundle/session_bundle.cc b/tensorflow/contrib/session_bundle/session_bundle.cc
index 7e0242c9981..577370dc471 100644
--- a/tensorflow/contrib/session_bundle/session_bundle.cc
+++ b/tensorflow/contrib/session_bundle/session_bundle.cc
@@ -36,6 +36,7 @@ limitations under the License.
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
 #include "tensorflow/core/protobuf/saver.pb.h"
 #include "tensorflow/core/public/session_options.h"
+#include "tensorflow/core/util/tensor_bundle/naming.h"
 
 namespace tensorflow {
 namespace serving {
@@ -85,7 +86,7 @@ void AddAssetsTensorsToInputs(const StringPiece export_dir,
         io::JoinPath(export_dir, kAssetsDirectory, asset.filename()));
     inputs->push_back(
         {asset.tensor_binding().tensor_name(), assets_file_tensor});
-    }
+  }
 }
 
 // Historically, model exporter(exporter.py) takes only saver with
@@ -104,7 +105,7 @@ void AddAssetsTensorsToInputs(const StringPiece export_dir,
 // distributed among the export.index and export.data-?????-of-????? files.
 string GetVariablesFilename(const StringPiece export_dir) {
   const char kVariablesFilename[] = "export";
-  const char kVariablesIndexFilename[] = "export.index";  // V2 ckpts
+  const string kVariablesIndexFilename = MetaFilename("export");  // V2 ckpts
   const char kVariablesFilenamePattern[] = "export-\?\?\?\?\?-of-\?\?\?\?\?";
   if (Env::Default()->FileExists(
           io::JoinPath(export_dir, kVariablesFilename)) ||
@@ -158,8 +159,7 @@ Status LoadSessionBundleFromPathUsingRunOptionsInternal(
     // Use serving graph_def in MetaGraphDef collection_def.
     if (graph_collection_def.any_list().value_size() != 1) {
       return errors::FailedPrecondition(
-          "Expected exactly one serving GraphDef in : ",
-          DebugStringIfAvailable(bundle->meta_graph_def));
+          "Expected exactly one serving GraphDef in : ", export_dir);
     }
     const auto& any = graph_collection_def.any_list().value(0);
     GraphDef graph_def;
@@ -194,9 +194,8 @@ Status LoadSessionBundleFromPathUsingRunOptionsInternal(
   const auto init_op_it = collection_def_map.find(kInitOpKey);
   if (init_op_it != collection_def_map.end()) {
     if (init_op_it->second.node_list().value_size() != 1) {
-      return errors::FailedPrecondition(
-          strings::StrCat("Expected exactly one serving init op in : ",
-                          DebugStringIfAvailable(bundle->meta_graph_def)));
+      return errors::FailedPrecondition(strings::StrCat(
+          "Expected exactly one serving init op in : ", export_dir));
     }
     TF_RETURN_IF_ERROR(RunInitOp(run_options, export_dir, asset_files,
                                  init_op_it->second.node_list().value(0),
diff --git a/tensorflow/contrib/slim/python/slim/evaluation.py b/tensorflow/contrib/slim/python/slim/evaluation.py
index 9f702bfdea0..5c28a265381 100644
--- a/tensorflow/contrib/slim/python/slim/evaluation.py
+++ b/tensorflow/contrib/slim/python/slim/evaluation.py
@@ -379,7 +379,8 @@ def evaluation_loop(master,
                     variables_to_restore=None,
                     eval_interval_secs=60,
                     max_number_of_evaluations=None,
-                    session_config=None):
+                    session_config=None,
+                    timeout=None):
   """Runs TF-Slim's Evaluation Loop.
 
   Args:
@@ -406,6 +407,8 @@ def evaluation_loop(master,
       If the value is left as 'None', the evaluation continues indefinitely.
     session_config: An instance of `tf.ConfigProto` that will be used to
       configure the `Session`. If left as `None`, the default will be used.
+    timeout: The maximum amount of time to wait between checkpoints. If left as
+      `None`, then the process will wait indefinitely.
 
   Returns:
     The value of `final_op` or `None` if `final_op` is `None`.
@@ -429,7 +432,8 @@ def evaluation_loop(master,
 
   number_of_evaluations = 0
   for checkpoint_path in checkpoints_iterator(checkpoint_dir,
-                                              eval_interval_secs):
+                                              eval_interval_secs,
+                                              timeout):
     logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                            time.gmtime()))
 
@@ -457,7 +461,9 @@ def evaluation_loop(master,
         number_of_evaluations >= max_number_of_evaluations):
       logging.info('Reached max_number_of_evaluations=%s. Exit',
                    max_number_of_evaluations)
-      break
+      return final_op_value
 
+  logging.info(
+      'Timed-out waiting for new checkpoint file. Exiting evaluation loop.')
   return final_op_value
 
diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py
index f78de7ad658..d72a0296ec8 100644
--- a/tensorflow/contrib/slim/python/slim/evaluation_test.py
+++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py
@@ -255,6 +255,36 @@ class EvaluationTest(tf.test.TestCase):
         '/non-existent-dir', timeout=0))
     self.assertEqual(ret, [])
 
+  def testEvaluationLoopTimeout(self):
+    _, update_op = slim.metrics.streaming_accuracy(
+        self._predictions, self._labels)
+    init_op = tf.group(tf.initialize_all_variables(),
+                       tf.initialize_local_variables())
+
+    # Create checkpoint and log directories.
+    chkpt_dir = os.path.join(self.get_temp_dir(), 'tmp_logs/')
+    gfile.MakeDirs(chkpt_dir)
+    logdir = os.path.join(self.get_temp_dir(), 'tmp_logs2/')
+    gfile.MakeDirs(logdir)
+
+    # Save initialized variables to checkpoint directory.
+    saver = tf.train.Saver()
+    with self.test_session() as sess:
+      init_op.run()
+      saver.save(sess, os.path.join(chkpt_dir, 'chkpt'))
+
+    # Run the evaluation loop with a timeout.
+    with self.test_session() as sess:
+      start = time.time()
+      slim.evaluation.evaluation_loop(
+          '', chkpt_dir, logdir, eval_op=update_op,
+          eval_interval_secs=2.0, timeout=6.0)
+      end = time.time()
+      # Check we've waited for the timeout.
+      self.assertGreater(end - start, 6.0)
+      # Then the timeout kicked in and stops the loop.
+      self.assertLess(end - start, 7.5)
+
 
 class SingleEvaluationTest(tf.test.TestCase):
 
diff --git a/tensorflow/contrib/tensor_forest/core/ops/tree_predictions_op.cc b/tensorflow/contrib/tensor_forest/core/ops/tree_predictions_op.cc
index 4a81d6d6c67..77d7f4290d0 100644
--- a/tensorflow/contrib/tensor_forest/core/ops/tree_predictions_op.cc
+++ b/tensorflow/contrib/tensor_forest/core/ops/tree_predictions_op.cc
@@ -185,10 +185,6 @@ class TreePredictions : public OpKernel {
       };
     } else {
       num_data = static_cast<int32>(input_data.shape().dim_size(0));
-      int32 num_features = 0;
-      if (num_data > 0) {
-        num_features = input_data.NumElements() / num_data;
-      }
       decide_function = [&input_data](
           int32 i, int32 feature, float bias, DataColumnTypes type) {
         const auto input_matrix = input_data.matrix<float>();
diff --git a/tensorflow/contrib/training/__init__.py b/tensorflow/contrib/training/__init__.py
index fc0e324bcf7..1290854260c 100644
--- a/tensorflow/contrib/training/__init__.py
+++ b/tensorflow/contrib/training/__init__.py
@@ -30,10 +30,12 @@ like to store state in the forward direction across segments of an example.
 ## Online data resampling
 
 To resample data with replacement on a per-example basis, use
-['resample_at_rate'](#resample_at_rate), providing the desired rate
-for each example. If you wish to specify relative rates, rather than
-absolute ones, use ['weighted_resample'](#weighted_resample) (which
-also returns the actual resampling rate used for each output example).
+['rejection_sample'](#rejection_sample) or
+['resample_at_rate'](#resample_at_rate). For `rejection_sample`, provide
+a boolean Tensor describing whether to accept or reject. For `resample_at_rate`,
+providing the desired rate for each example. If you wish to specify relative
+rates, rather than absolute ones, use ['weighted_resample'](#weighted_resample)
+(which also returns the actual resampling rate used for each output example).
 
 Use ['stratified_sample'](#stratified_sample) or
 ['stratified_sample_unknown_dist'](#stratified_sample_unknown_dist) to
@@ -43,6 +45,7 @@ have a binary classification dataset that is 99.9% class 1, a common
 approach is to resample from the data so that the data is more
 balanced.
 
+@@rejection_sample
 @@resample_at_rate
 @@stratified_sample
 @@stratified_sample_unknown_dist
diff --git a/tensorflow/contrib/training/python/training/sampling_ops.py b/tensorflow/contrib/training/python/training/sampling_ops.py
index c703e22e24d..05f5ec6b39c 100644
--- a/tensorflow/contrib/training/python/training/sampling_ops.py
+++ b/tensorflow/contrib/training/python/training/sampling_ops.py
@@ -27,14 +27,96 @@ from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.training import input as input_ops
 from tensorflow.python.training import queue_runner
 
-__all__ = ['stratified_sample',
+
+__all__ = ['rejection_sample',
+           'stratified_sample',
            'stratified_sample_unknown_dist',]
 
 
+def rejection_sample(tensors, accept_prob_fn, batch_size, queue_threads=1,
+                     enqueue_many=False, prebatch_capacity=16,
+                     prebatch_threads=1, runtime_checks=False, name=None):
+  """Stochastically creates batches by rejection sampling.
+
+  Each list of non-batched tensors is evaluated by `accept_prob_fn`, to produce
+  a scalar tensor between 0 and 1. This tensor corresponds to the probability of
+  being accepted. When `batch_size` tensor groups have been accepted, the batch
+  queue will return a mini-batch.
+
+  Args:
+    tensors: List of tensors for data. All tensors are either one item or a
+        batch, according to enqueue_many.
+    accept_prob_fn: A python lambda that takes a non-batch tensor from each
+        item in `tensors`, and produces a scalar tensor.
+    batch_size: Size of batch to be returned.
+    queue_threads: The number of threads for the queue that will hold the final
+      batch.
+    enqueue_many: Bool. If true, interpret input tensors as having a batch
+        dimension.
+    prebatch_capacity: Capacity for the large queue that is used to convert
+      batched tensors to single examples.
+    prebatch_threads: Number of threads for the large queue that is used to
+      convert batched tensors to single examples.
+    runtime_checks: Bool. If true, insert runtime checks on the output of
+        `accept_prob_fn`. Using `True` might have a performance impact.
+    name: Optional prefix for ops created by this function.
+  Raises:
+    ValueError: enqueue_many is True and labels doesn't have a batch
+        dimension, or if enqueue_many is False and labels isn't a scalar.
+    ValueError: enqueue_many is True, and batch dimension on data and labels
+        don't match.
+    ValueError: if a zero initial probability class has a nonzero target
+        probability.
+  Returns:
+    A list of tensors of the same length as `tensors`, with batch dimension
+    `batch_size`.
+
+  Example:
+    # Get tensor for a single data and label example.
+    data, label = data_provider.Get(['data', 'label'])
+
+    # Get stratified batch according to data tensor.
+    accept_prob_fn = lambda x: (tf.tanh(x[0]) + 1) / 2
+    data_batch = tf.contrib.training.rejection_sample(
+        [data, label], accept_prob_fn, 16)
+
+    # Run batch through network.
+    ...
+  """
+  with variable_scope.variable_scope(name, 'rejection_sample', tensors):
+    tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensors)
+    # Reduce the case of a batched example to that of a batch of a single
+    # example by taking a batch of size one.
+    if enqueue_many:
+      # Validate that batch dimension of the input is consistent.
+      tensor_list = _verify_data_inputs(tensor_list)
+
+      # Make a single queue to hold input examples. Reshape output so examples
+      # don't have singleton batch dimension.
+      batched = input_ops.batch(tensor_list,
+                                batch_size=1,
+                                num_threads=prebatch_threads,
+                                capacity=prebatch_capacity,
+                                enqueue_many=True)
+      tensor_list = [array_ops.squeeze(x, [0]) for x in batched]
+
+    # Set up a queue containing batches that have the distribution.
+    cur_prob = accept_prob_fn(tensor_list)
+    if runtime_checks:
+      cur_prob = array_ops.identity(control_flow_ops.with_dependencies(
+          [check_ops.assert_less_equal(0.0, cur_prob),
+           check_ops.assert_less_equal(cur_prob, 1.0)],
+          cur_prob), name='prob_with_checks')
+    keep_input = random_ops.random_uniform([]) < cur_prob
+    return _conditional_batch(
+        tensor_list, keep_input, batch_size, num_threads=queue_threads)
+
+
 def stratified_sample(tensors, labels, target_probs, batch_size,
                       init_probs=None, enqueue_many=False, queue_capacity=16,
                       threads_per_queue=1, name=None):
@@ -145,8 +227,12 @@ def stratified_sample(tensors, labels, target_probs, batch_size,
     # Set up second queue containing batches that have the desired class
     # proportions.
     cur_prob = array_ops.gather(accept_probs, label)
+    keep_input = random_ops.random_uniform([]) < cur_prob
     batched = _conditional_batch(
-        val_list + [label], cur_prob, batch_size, threads_per_queue)
+        val_list + [label],
+        keep_input,
+        batch_size,
+        num_threads=threads_per_queue)
     return batched[:-1], batched[-1]
 
 
@@ -260,6 +346,18 @@ def _estimate_data_distribution(labels, num_classes, smoothing_constant=10):
   return math_ops.cast(init_prob_estimate, dtypes.float32)
 
 
+def _verify_data_inputs(tensor_list):
+  """Verify that batched data inputs are well-formed."""
+  for tensor in tensor_list:
+    # Data tensor should have a batch dimension.
+    tensor_shape = tensor.get_shape().with_rank_at_least(1)
+
+    # Data batch dimensions must be compatible.
+    tensor_shape[0].assert_is_compatible_with(tensor_list[0].get_shape()[0])
+
+  return tensor_list
+
+
 def _verify_input(tensor_list, labels, probs_list):
   """Verify that batched inputs are well-formed."""
   checked_probs_list = []
@@ -374,16 +472,16 @@ def _calculate_acceptance_probabilities(init_probs, target_probs):
   return ratio_l / max_ratio
 
 
-def _conditional_batch(tensors, accept_prob, batch_size, queue_threads=10):
+def _conditional_batch(tensors, keep_input, batch_size, num_threads=10):
   """Conditionally enqueue tensors based on accept_prob.
 
   Specifically, enqueue the element if accept_prob > rand_unif([0, 1]).
 
   Args:
       tensors: List of tensors to enqueue.
-      accept_prob: Acceptance probability per example.
+      keep_input: Bool. Whether to enqueue or not.
       batch_size: Size of batch.
-      queue_threads: Number of threads enqueuing in the final queue.
+      num_threads: Number of enqueueing threads.
 
   Returns:
       List of batched tensors.
@@ -391,7 +489,7 @@ def _conditional_batch(tensors, accept_prob, batch_size, queue_threads=10):
   Raises:
       ValueError: `accept_prob` isn't 0D.
   """
-  accept_prob.get_shape().assert_has_rank(0)
+  keep_input.get_shape().assert_has_rank(0)
   # Determine shapes and types of to-be-enqueued-tensors.
   shapes_list = []
   dtypes_list = []
@@ -409,13 +507,12 @@ def _conditional_batch(tensors, accept_prob, batch_size, queue_threads=10):
 
   # Conditionally enqueue.
   # Reshape enqueue op to match no_op's shape.
-  eq_tf = math_ops.less(random_ops.random_uniform([]), accept_prob)
   conditional_enqueue = control_flow_ops.cond(
-      eq_tf,
+      keep_input,
       lambda: final_q.enqueue(tensors),
       control_flow_ops.no_op)
   queue_runner.add_queue_runner(queue_runner.QueueRunner(
-      final_q, [conditional_enqueue] * queue_threads))
+      final_q, [conditional_enqueue] * num_threads))
 
   out_tensor = final_q.dequeue_many(batch_size)
   # Queues return a single tensor if the list of enqued tensors is one. Since we
diff --git a/tensorflow/contrib/training/python/training/sampling_ops_test.py b/tensorflow/contrib/training/python/training/sampling_ops_test.py
index 2d663d7954f..bbc0a284cd1 100644
--- a/tensorflow/contrib/training/python/training/sampling_ops_test.py
+++ b/tensorflow/contrib/training/python/training/sampling_ops_test.py
@@ -24,7 +24,7 @@ from tensorflow.contrib.training.python.training import sampling_ops
 from tensorflow.python.platform import tf_logging as logging
 
 
-class SamplingOpsTest(tf.test.TestCase):
+class StratifiedSampleTest(tf.test.TestCase):
 
   def testGraphBuildAssertionFailures(self):
     val = [tf.zeros([1, 3]), tf.ones([1, 5])]
@@ -383,17 +383,79 @@ class SamplingOpsTest(tf.test.TestCase):
 
     self.normalBehaviorHelper(curried_sampler)
 
+
+class RejectionSampleTest(tf.test.TestCase):
+
+  def testGraphConstructionFailures(self):
+    accept_prob_fn = lambda _: tf.constant(1.0)
+    batch_size = 32
+    # Data must have batch dimension if `enqueue_many` is `True`.
+    with self.assertRaises(ValueError):
+      tf.contrib.training.rejection_sample(
+          [tf.zeros([])], accept_prob_fn, batch_size, enqueue_many=True)
+
+    # Batch dimensions should be equal if `enqueue_many` is `True`.
+    with self.assertRaises(ValueError):
+      tf.contrib.training.rejection_sample(
+          [tf.zeros([5, 1]), tf.zeros([4, 1])], accept_prob_fn, batch_size,
+          enqueue_many=True)
+
+  def testRuntimeFailures(self):
+    prob_ph = tf.placeholder(tf.float32, [])
+    accept_prob_fn = lambda _: prob_ph
+    batch_size = 32
+
+    # Set up graph.
+    tf.set_random_seed(1234)
+    tf.contrib.training.rejection_sample(
+        [tf.zeros([])], accept_prob_fn, batch_size, runtime_checks=True,
+        name='rejection_sample')
+    prob_tensor = tf.get_default_graph().get_tensor_by_name(
+        'rejection_sample/prob_with_checks:0')
+
+    # Run session that should fail.
+    with self.test_session() as sess:
+      for illegal_prob in [-0.1, 1.1]:
+        with self.assertRaises(tf.errors.InvalidArgumentError):
+          sess.run(prob_tensor, feed_dict={prob_ph: illegal_prob})
+
+  def testNormalBehavior(self):
+    tensor_list = [tf.cond(
+        tf.greater(.5, tf.random_uniform([])),
+        lambda: tf.constant(1.0),
+        lambda: tf.constant(2.0))]
+    accept_prob_fn = lambda x: x[0] - 1.0
+    batch_size = 10
+
+    # Set up graph.
+    sample = tf.contrib.training.rejection_sample(
+        tensor_list, accept_prob_fn, batch_size)
+
+    with self.test_session() as sess:
+      coord = tf.train.Coordinator()
+      threads = tf.train.start_queue_runners(coord=coord)
+
+      for _ in range(5):
+        sample_np = sess.run(sample)[0]
+        self.assertListEqual([2.0] * batch_size, list(sample_np))
+
+      coord.request_stop()
+      coord.join(threads)
+
+
+class ConditionalBatchTest(tf.test.TestCase):
+
   def testConditionallyEnqueueAndBatch(self):
     tf.set_random_seed(1234)
     tensor = tf.cond(
         tf.greater(.5, tf.random_uniform([])),
         lambda: tf.constant(1.0),
         lambda: tf.constant(2.0))
-    accept_prob = tensor - 1
+    keep_input = tf.equal(tensor, 2.0)
     batch_size = 4
 
     # Set up the test graph.
-    [batch] = sampling_ops._conditional_batch([tensor], accept_prob, batch_size)  # pylint: disable=protected-access
+    [batch] = sampling_ops._conditional_batch([tensor], keep_input, batch_size)  # pylint: disable=protected-access
 
     # Check conditional operation.
     with self.test_session():
@@ -411,13 +473,13 @@ class SamplingOpsTest(tf.test.TestCase):
 
   def testConditionallyEnqueueAndBatchTypes(self):
     tensor = tf.constant(1.0)
-    accept_prob = tensor - 1
+    keep_input = tf.constant(True)
     batch_size = 4
 
     # Check that output types are the same for 1 and 2-length input lists.
-    output1 = sampling_ops._conditional_batch([tensor], accept_prob, batch_size)  # pylint: disable=protected-access
+    output1 = sampling_ops._conditional_batch([tensor], keep_input, batch_size)  # pylint: disable=protected-access
     output2 = sampling_ops._conditional_batch(  # pylint: disable=protected-access
-        [tensor, tensor], accept_prob, batch_size)
+        [tensor, tensor], keep_input, batch_size)
     self.assertEqual(type(output1), type(output2))
 
 
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index afae52dd3d3..f32cd0e6fc8 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -712,6 +712,7 @@ cc_library(
     name = "android_tensorflow_lib_lite",
     srcs = if_android(["//tensorflow/core:android_srcs"]),
     copts = tf_copts() + ["-Os"],
+    linkopts = ["-lz"],
     tags = [
         "manual",
         "notap",
@@ -769,7 +770,6 @@ cc_library(
     name = "android_tensorflow_lib",
     srcs = if_android([":android_op_registrations_and_gradients"]),
     copts = tf_copts(),
-    linkopts = ["-lz"],
     tags = [
         "manual",
         "notap",
@@ -1093,12 +1093,12 @@ tf_cuda_library(
         ],
     ) + select({
         "//tensorflow:windows": [],
-        "//conditions:default": glob([
+        "//conditions:default": [
             "util/memmapped_file_system.h",
             "util/memmapped_file_system.cc",
             "util/memmapped_file_system_writer.h",
             "util/memmapped_file_system_writer.cc",
-        ]),
+        ],
     }),
     hdrs = [
         "framework/op_segment.h",
@@ -1107,6 +1107,7 @@ tf_cuda_library(
         "framework/tracking_allocator.h",  # only needed for tests
         "framework/unique_tensor_references.h",
         "util/command_line_flags.h",
+        "util/env_var.h",
         "util/presized_cuckoo_map.h",
         "util/tensor_slice_set.h",
         "util/tensor_slice_util.h",
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 848a71c4746..59fa09bd8db 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -435,9 +435,19 @@ Status DirectSession::Run(const RunOptions& run_options,
   }
 
   const bool do_trace = (run_options.trace_level() > RunOptions::NO_TRACE);
-  const int64 build_cost_model =
-      options_.config.graph_options().build_cost_model();
-  if (do_trace || build_cost_model > 0) {
+
+  bool update_cost_model = false;
+  if (options_.config.graph_options().build_cost_model() > 0) {
+    const int64 build_cost_model_every =
+        options_.config.graph_options().build_cost_model();
+    const int64 build_cost_model_after =
+        options_.config.graph_options().build_cost_model_after();
+    update_cost_model =
+        ((executors_and_keys->step_count + 1 - build_cost_model_after) %
+             build_cost_model_every ==
+         0);
+  }
+  if (do_trace || update_cost_model) {
     run_state.collector.reset(
         new StepStatsCollector(run_metadata->mutable_step_stats()));
     args.stats_collector = run_state.collector.get();
@@ -479,7 +489,7 @@ Status DirectSession::Run(const RunOptions& run_options,
   // Build and return the cost model as instructed.
   mutex_lock l(executor_lock_);
   ++executors_and_keys->step_count;
-  if (executors_and_keys->step_count == build_cost_model) {
+  if (update_cost_model) {
     // Build the cost model
     std::unordered_map<string, const Graph*> device_to_graph;
     for (const PerPartitionExecutorsAndLib& partition :
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index ccc962927b9..a6445fd0403 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -646,7 +646,7 @@ class ExecutorState {
     int64 iteration_count GUARDED_BY(mu) = 0;
 
     // The number of outstanding iterations.
-    int num_outstanding_iterations GUARDED_BY(mu);
+    int num_outstanding_iterations GUARDED_BY(mu) = 1;
 
     // The active iteration states of this frame.
     gtl::InlinedVector<IterationState*, 12> iterations;
@@ -1193,8 +1193,8 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) {
             (first_input + i)->ClearVal();
           }
           FrameState* input_frame = state->tagged_node.input_frame;
-          int64 input_iter = state->tagged_node.input_iter;
-          int id = state->tagged_node.node->id();
+          const int64 input_iter = state->tagged_node.input_iter;
+          const int id = state->tagged_node.node->id();
           MaybeMarkCompleted(input_frame, input_iter, id);
           TaggedNodeSeq ready;
           if (s.ok()) {
@@ -1554,120 +1554,6 @@ void ExecutorState::PropagateOutputs(const TaggedNode& tagged_node,
   }
 }
 
-void ExecutorState::FrameState::ActivateNodes(const Node* node,
-                                              const bool is_dead, int64 iter,
-                                              const EntryVector& outputs,
-                                              TaggedNodeSeq* ready) {
-  const NodeItem* nodes = executor->nodes_;
-  IterationState* iter_state = GetIteration(iter);
-  for (const Edge* e : node->out_edges()) {
-    const Node* dst_node = e->dst();
-    const int dst_id = dst_node->id();
-    const int src_slot = e->src_output();
-
-    bool dst_dead = false;
-    bool dst_ready = false;
-    // True iff this input for dst is needed. We only set this input for
-    // dst if this flag is true. This is needed to make the thread safety
-    // analysis happy.
-    bool dst_need_input = !e->IsControlEdge();
-    if (IsMerge(dst_node)) {
-      // A merge node is ready if all control inputs have arrived and either
-      // a) a live data input becomes available or b) all data inputs are dead.
-      // For Merge, pending's LSB is set iff a live data input has arrived.
-      if (e->IsControlEdge()) {
-        iter_state->decrement_pending(dst_id, 2);
-        int count = iter_state->pending(dst_id);
-        dst_dead = (iter_state->dead_count(dst_id) == dst_node->num_inputs());
-        dst_ready = (count == 0) || ((count == 1) && dst_dead);
-      } else {
-        if (outputs[src_slot].has_value) {
-          // This is a live data input.
-          int count = iter_state->pending(dst_id);
-          iter_state->mark_live(dst_id);
-          // Only the first live edge sets the input and (potentially)
-          // triggers execution. The low bit of count is set if and
-          // only if no live input has been used yet (mark_live clears
-          // it). The node should be started if and only if this is
-          // the first live input and there are no pending control
-          // edges, i.e. count == 1.
-          dst_ready = (count == 1);
-          dst_need_input = ((count & 0x1) == 1);
-        } else {
-          // This is a dead data input. Note that dst_node is dead if node is
-          // a dead enter. We need this to handle properly a while loop on
-          // the untaken branch of a conditional.
-          // TODO(yuanbyu): This is a bit hacky, but a good solution for now.
-          iter_state->increment_dead_count(dst_id);
-          const int dead_cnt = iter_state->dead_count(dst_id);
-          dst_dead = (dead_cnt == dst_node->num_inputs()) || IsEnter(node);
-          dst_ready = (iter_state->pending(dst_id) == 1) && dst_dead;
-          dst_need_input = false;
-        }
-      }
-    } else {
-      // A non-merge node is ready if all its inputs are ready. We wait
-      // for all inputs to come in even if we know the node is dead. This
-      // ensures that all input tensors get cleaned up.
-      if (is_dead || (!e->IsControlEdge() && !outputs[src_slot].has_value)) {
-        iter_state->increment_dead_count(dst_id);
-      }
-      dst_dead = iter_state->dead_count(dst_id) > 0;
-      dst_ready = (iter_state->decrement_pending(dst_id, 1) == 0);
-    }
-
-    if (dst_need_input) {
-      const NodeItem& dst_item = nodes[dst_id];
-      const int dst_slot = e->dst_input();
-      Entry* input_tensors = iter_state->input_tensors;
-      int dst_loc = dst_item.input_start + dst_slot;
-      input_tensors[dst_loc] = outputs[src_slot];
-    }
-
-    // Add dst to the ready queue if it's ready
-    if (dst_ready) {
-      dst_dead = dst_dead && !IsControlTrigger(dst_node);
-      ready->push_back(TaggedNode(dst_node, this, iter, dst_dead));
-      iter_state->outstanding_ops++;
-    }
-  }
-}
-
-void ExecutorState::FrameState::ActivateNexts(int64 iter,
-                                              TaggedNodeSeq* ready) {
-  // Propagate the deferred NextIteration nodes to the new iteration.
-  for (auto& node_entry : next_iter_roots) {
-    const Node* node = node_entry.first;
-    const Entry& entry = node_entry.second;
-    const bool is_dead = !entry.has_value;
-    ActivateNodes(node, is_dead, iter, {entry}, ready);
-  }
-  next_iter_roots.clear();
-}
-
-void ExecutorState::FrameState::ActivateLoopInvs(int64 iter,
-                                                 TaggedNodeSeq* ready) {
-  // Propagate loop invariants to the new iteration.
-  for (auto& node_entry : inv_values) {
-    const Node* node = node_entry.first;
-    const Entry& entry = node_entry.second;
-    const bool is_dead = !entry.has_value;
-    ActivateNodes(node, is_dead, iter, {entry}, ready);
-  }
-}
-
-void ExecutorState::FrameState::AddLoopInv(const Node* node, const Entry& entry,
-                                           TaggedNodeSeq* ready) {
-  // Store this value.
-  inv_values.push_back({node, entry});
-
-  // Make this value available to all iterations.
-  bool is_dead = !entry.has_value;
-  for (int i = 0; i <= iteration_count; ++i) {
-    ActivateNodes(node, is_dead, i, {entry}, ready);
-  }
-}
-
 bool ExecutorState::NodeDone(const Status& s, const Node* node,
                              const TaggedNodeSeq& ready, NodeExecStats* stats,
                              TaggedNodeReadyQueue* inline_ready) {
@@ -1905,57 +1791,6 @@ void ExecutorState::Finish() {
   runner([=]() { done_cb(status); });
 }
 
-bool ExecutorState::FrameState::IsIterationDone(int64 iter) {
-  IterationState* iter_state = GetIteration(iter);
-  if (iter_state->outstanding_ops == 0 &&
-      iter_state->outstanding_frame_count == 0) {
-    if (iter == 0) {
-      // The enclosing frame has no pending input.
-      return num_pending_inputs == 0;
-    } else {
-      // The preceding iteration is deleted (and therefore done).
-      return (GetIteration(iter - 1) == nullptr);
-    }
-  }
-  return false;
-}
-
-void ExecutorState::FrameState::IncrementIteration(TaggedNodeSeq* ready) {
-  iteration_count++;
-  int64 next_iter = iteration_count;
-
-  // Initialize the next iteration.
-  IterationState* iter_state = new IterationState(executor);
-  SetIteration(next_iter, iter_state);
-  num_outstanding_iterations++;
-  dead_exits.clear();
-
-  // Activate the successors of the deferred roots in the new iteration.
-  ActivateNexts(next_iter, ready);
-
-  // Activate the loop invariants in the new iteration.
-  ActivateLoopInvs(next_iter, ready);
-}
-
-bool ExecutorState::FrameState::CleanupIterations(int64 iter,
-                                                  TaggedNodeSeq* ready) {
-  int64 curr_iter = iter;
-  while (curr_iter <= iteration_count && IsIterationDone(curr_iter)) {
-    // Delete the iteration curr_iter.
-    delete GetIteration(curr_iter);
-    SetIteration(curr_iter, nullptr);
-    --num_outstanding_iterations;
-    ++curr_iter;
-
-    // When one iteration is completed, we check for deferred iteration,
-    // and start it if there is one.
-    if (!next_iter_roots.empty()) {
-      IncrementIteration(ready);
-    }
-  }
-  return IsFrameDone();
-}
-
 void ExecutorState::FindOrCreateChildFrame(FrameState* frame, int64 iter,
                                            const Node* node,
                                            FrameState** child) {
@@ -2002,14 +1837,15 @@ void ExecutorState::FindOrCreateChildFrame(FrameState* frame, int64 iter,
     auto it = outstanding_frames_.find(child_name);
     if (it != outstanding_frames_.end()) {
       *child = it->second;
-      delete temp;  // Not used so delete it.
     } else {
       mutex_lock frame_lock(frame->mu);
       frame->GetIteration(iter)->outstanding_frame_count++;
       outstanding_frames_[child_name] = temp;
       *child = temp;
+      temp = nullptr;
     }
   }
+  delete temp;  // Not used so delete it.
 }
 
 void ExecutorState::DeleteFrame(FrameState* frame, TaggedNodeSeq* ready) {
@@ -2084,6 +1920,171 @@ void ExecutorState::CleanupFramesIterations(FrameState* frame, int64 iter,
   }
 }
 
+void ExecutorState::FrameState::ActivateNodes(const Node* node,
+                                              const bool is_dead, int64 iter,
+                                              const EntryVector& outputs,
+                                              TaggedNodeSeq* ready) {
+  const NodeItem* nodes = executor->nodes_;
+  IterationState* iter_state = GetIteration(iter);
+  for (const Edge* e : node->out_edges()) {
+    const Node* dst_node = e->dst();
+    const int dst_id = dst_node->id();
+    const int src_slot = e->src_output();
+
+    bool dst_dead = false;
+    bool dst_ready = false;
+    // True iff this input for dst is needed. We only set this input for
+    // dst if this flag is true. This is needed to make the thread safety
+    // analysis happy.
+    bool dst_need_input = !e->IsControlEdge();
+    if (IsMerge(dst_node)) {
+      // A merge node is ready if all control inputs have arrived and either
+      // a) a live data input becomes available or b) all data inputs are dead.
+      // For Merge, pending's LSB is set iff a live data input has arrived.
+      if (e->IsControlEdge()) {
+        iter_state->decrement_pending(dst_id, 2);
+        int count = iter_state->pending(dst_id);
+        dst_dead = (iter_state->dead_count(dst_id) == dst_node->num_inputs());
+        dst_ready = (count == 0) || ((count == 1) && dst_dead);
+      } else {
+        if (outputs[src_slot].has_value) {
+          // This is a live data input.
+          int count = iter_state->pending(dst_id);
+          iter_state->mark_live(dst_id);
+          // Only the first live edge sets the input and (potentially)
+          // triggers execution. The low bit of count is set if and
+          // only if no live input has been used yet (mark_live clears
+          // it). The node should be started if and only if this is
+          // the first live input and there are no pending control
+          // edges, i.e. count == 1.
+          dst_ready = (count == 1);
+          dst_need_input = ((count & 0x1) == 1);
+        } else {
+          // This is a dead data input. Note that dst_node is dead if node is
+          // a dead enter. We need this to handle properly a while loop on
+          // the untaken branch of a conditional.
+          // TODO(yuanbyu): This is a bit hacky, but a good solution for now.
+          iter_state->increment_dead_count(dst_id);
+          const int dead_cnt = iter_state->dead_count(dst_id);
+          dst_dead = (dead_cnt == dst_node->num_inputs()) || IsEnter(node);
+          dst_ready = (iter_state->pending(dst_id) == 1) && dst_dead;
+          dst_need_input = false;
+        }
+      }
+    } else {
+      // A non-merge node is ready if all its inputs are ready. We wait
+      // for all inputs to come in even if we know the node is dead. This
+      // ensures that all input tensors get cleaned up.
+      if (is_dead || (!e->IsControlEdge() && !outputs[src_slot].has_value)) {
+        iter_state->increment_dead_count(dst_id);
+      }
+      dst_dead = iter_state->dead_count(dst_id) > 0;
+      dst_ready = (iter_state->decrement_pending(dst_id, 1) == 0);
+    }
+
+    if (dst_need_input) {
+      const NodeItem& dst_item = nodes[dst_id];
+      const int dst_slot = e->dst_input();
+      Entry* input_tensors = iter_state->input_tensors;
+      int dst_loc = dst_item.input_start + dst_slot;
+      input_tensors[dst_loc] = outputs[src_slot];
+    }
+
+    // Add dst to the ready queue if it's ready
+    if (dst_ready) {
+      dst_dead = dst_dead && !IsControlTrigger(dst_node);
+      ready->push_back(TaggedNode(dst_node, this, iter, dst_dead));
+      iter_state->outstanding_ops++;
+    }
+  }
+}
+
+void ExecutorState::FrameState::ActivateNexts(int64 iter,
+                                              TaggedNodeSeq* ready) {
+  // Propagate the deferred NextIteration nodes to the new iteration.
+  for (auto& node_entry : next_iter_roots) {
+    const Node* node = node_entry.first;
+    const Entry& entry = node_entry.second;
+    const bool is_dead = !entry.has_value;
+    ActivateNodes(node, is_dead, iter, {entry}, ready);
+  }
+  next_iter_roots.clear();
+}
+
+void ExecutorState::FrameState::ActivateLoopInvs(int64 iter,
+                                                 TaggedNodeSeq* ready) {
+  // Propagate loop invariants to the new iteration.
+  for (auto& node_entry : inv_values) {
+    const Node* node = node_entry.first;
+    const Entry& entry = node_entry.second;
+    const bool is_dead = !entry.has_value;
+    ActivateNodes(node, is_dead, iter, {entry}, ready);
+  }
+}
+
+void ExecutorState::FrameState::AddLoopInv(const Node* node, const Entry& entry,
+                                           TaggedNodeSeq* ready) {
+  // Store this value.
+  inv_values.push_back({node, entry});
+
+  // Make this value available to all iterations.
+  bool is_dead = !entry.has_value;
+  for (int i = 0; i <= iteration_count; ++i) {
+    ActivateNodes(node, is_dead, i, {entry}, ready);
+  }
+}
+
+bool ExecutorState::FrameState::IsIterationDone(int64 iter) {
+  IterationState* iter_state = GetIteration(iter);
+  if (iter_state->outstanding_ops == 0 &&
+      iter_state->outstanding_frame_count == 0) {
+    if (iter == 0) {
+      // The enclosing frame has no pending input.
+      return num_pending_inputs == 0;
+    } else {
+      // The preceding iteration is deleted (and therefore done).
+      return (GetIteration(iter - 1) == nullptr);
+    }
+  }
+  return false;
+}
+
+void ExecutorState::FrameState::IncrementIteration(TaggedNodeSeq* ready) {
+  iteration_count++;
+  int64 next_iter = iteration_count;
+
+  // Initialize the next iteration.
+  IterationState* iter_state = new IterationState(executor);
+  SetIteration(next_iter, iter_state);
+  num_outstanding_iterations++;
+  dead_exits.clear();
+
+  // Activate the successors of the deferred roots in the new iteration.
+  ActivateNexts(next_iter, ready);
+
+  // Activate the loop invariants in the new iteration.
+  ActivateLoopInvs(next_iter, ready);
+}
+
+bool ExecutorState::FrameState::CleanupIterations(int64 iter,
+                                                  TaggedNodeSeq* ready) {
+  int64 curr_iter = iter;
+  while (curr_iter <= iteration_count && IsIterationDone(curr_iter)) {
+    // Delete the iteration curr_iter.
+    delete GetIteration(curr_iter);
+    SetIteration(curr_iter, nullptr);
+    --num_outstanding_iterations;
+    ++curr_iter;
+
+    // When one iteration is completed, we check for deferred iteration,
+    // and start it if there is one.
+    if (!next_iter_roots.empty()) {
+      IncrementIteration(ready);
+    }
+  }
+  return IsFrameDone();
+}
+
 void ExecutorImpl::RunAsync(const Args& args, DoneCallback done) {
   (new ExecutorState(args, this))->RunAsync(done);
 }
diff --git a/tensorflow/core/common_runtime/gpu/process_state.cc b/tensorflow/core/common_runtime/gpu/process_state.cc
index 60da115988e..77dabdf4d16 100644
--- a/tensorflow/core/common_runtime/gpu/process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/process_state.cc
@@ -29,6 +29,7 @@ limitations under the License.
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/stream_executor.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/env_var.h"
 
 // If these flags need to be runtime configurable, consider adding
 // options to ConfigProto.
@@ -203,8 +204,17 @@ Allocator* ProcessState::GetCUDAHostAllocator(int numa_node) {
     Allocator* allocator = nullptr;
     static constexpr bool kCudaHostMemoryUseBFC = true;
     if (kCudaHostMemoryUseBFC) {
+      // TODO(zheng-xq): evaluate whether 64GB by default is the best choice.
+      int64 cuda_host_mem_limit_in_mb = -1;
+      Status status = ReadInt64FromEnvVar("TF_CUDA_HOST_MEM_LIMIT_IN_MB",
+                                          1LL << 16 /*64GB max by default*/,
+                                          &cuda_host_mem_limit_in_mb);
+      if (!status.ok()) {
+        LOG(ERROR) << "GetCUDAHostAllocator: " << status.error_message();
+      }
+      int64 cuda_host_mem_limit = cuda_host_mem_limit_in_mb * (1LL << 20);
       allocator =
-          new BFCAllocator(new CUDAHostAllocator(se), 1LL << 36 /*64GB max*/,
+          new BFCAllocator(new CUDAHostAllocator(se), cuda_host_mem_limit,
                            true /*allow_growth*/, "cuda_host_bfc" /*name*/);
     } else {
       allocator = new PoolAllocator(
diff --git a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc
index c568896de7f..eef32e799eb 100644
--- a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc
+++ b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_factory.h"
+#include "tensorflow/core/common_runtime/local_device.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/op_segment.h"
@@ -46,6 +47,9 @@ Benchmark::Benchmark(const string& device, Graph* g,
 
   testing::StopTiming();
   string t = str_util::Uppercase(device);
+  // Allow NewDevice to allocate a new threadpool with different number of
+  // threads for each new benchmark.
+  LocalDevice::set_use_global_threadpool(false);
   device_ =
       DeviceFactory::NewDevice(t, *options, "/job:localhost/replica:0/task:0");
   CHECK(device_) << "Could not create a " << device << " device";
diff --git a/tensorflow/core/common_runtime/local_device.cc b/tensorflow/core/common_runtime/local_device.cc
index bbd04e2dbbd..e55456c039a 100644
--- a/tensorflow/core/common_runtime/local_device.cc
+++ b/tensorflow/core/common_runtime/local_device.cc
@@ -26,42 +26,60 @@ limitations under the License.
 
 namespace tensorflow {
 
-namespace {
+/* static */
+bool LocalDevice::use_global_threadpool_ = true;
 
-DeviceBase::CpuWorkerThreads eigen_worker_threads;
-Eigen::ThreadPoolInterface* eigen_thread_pool = nullptr;
-Eigen::ThreadPoolDevice* eigen_device = nullptr;
-
-static bool InitModule(const SessionOptions& options) {
-  int32 intra_op_parallelism_threads =
-      options.config.intra_op_parallelism_threads();
-  if (intra_op_parallelism_threads == 0) {
-    intra_op_parallelism_threads = port::NumSchedulableCPUs();
+struct LocalDevice::EigenThreadPoolInfo {
+  EigenThreadPoolInfo(const SessionOptions& options) {
+    int32 intra_op_parallelism_threads =
+        options.config.intra_op_parallelism_threads();
+    if (intra_op_parallelism_threads == 0) {
+      intra_op_parallelism_threads = port::NumSchedulableCPUs();
+    }
+    VLOG(1) << "Local device intra op parallelism threads: "
+            << intra_op_parallelism_threads;
+    eigen_worker_threads_.num_threads = intra_op_parallelism_threads;
+    eigen_worker_threads_.workers = new thread::ThreadPool(
+        options.env, "Eigen", intra_op_parallelism_threads);
+    eigen_threadpool_wrapper_.reset(
+        new EigenThreadPoolWrapper(eigen_worker_threads_.workers));
+    eigen_device_.reset(new Eigen::ThreadPoolDevice(
+        eigen_threadpool_wrapper_.get(), eigen_worker_threads_.num_threads));
   }
-  VLOG(1) << "Local device intra op parallelism threads: "
-          << intra_op_parallelism_threads;
-  eigen_worker_threads.num_threads = intra_op_parallelism_threads;
-  eigen_worker_threads.workers = new thread::ThreadPool(
-      options.env, "Eigen", intra_op_parallelism_threads);
-  eigen_thread_pool = new EigenThreadPoolWrapper(eigen_worker_threads.workers);
-  eigen_device = new Eigen::ThreadPoolDevice(eigen_thread_pool,
-                                             eigen_worker_threads.num_threads);
-  return true;
-}
-}  // end namespace
 
-// LocalDevice ----------------------------------------------------------------
+  ~EigenThreadPoolInfo() {
+    eigen_threadpool_wrapper_.reset();
+    eigen_device_.reset();
+    delete eigen_worker_threads_.workers;
+  }
+
+  DeviceBase::CpuWorkerThreads eigen_worker_threads_;
+  std::unique_ptr<Eigen::ThreadPoolInterface> eigen_threadpool_wrapper_;
+  std::unique_ptr<Eigen::ThreadPoolDevice> eigen_device_;
+};
 
 LocalDevice::LocalDevice(const SessionOptions& options,
                          const DeviceAttributes& attributes,
                          Allocator* device_allocator)
-    : Device(options.env, attributes, device_allocator) {
-  // All ThreadPoolDevices in the process will use this single fixed
-  // sized threadpool for numerical computations.
-  static bool init = InitModule(options);
-  CHECK(init);  // Avoids compiler warning that init is unused.
-  set_tensorflow_cpu_worker_threads(&eigen_worker_threads);
-  set_eigen_cpu_device(eigen_device);
+    : Device(options.env, attributes, device_allocator),
+      owned_tp_info_(nullptr) {
+  LocalDevice::EigenThreadPoolInfo* tp_info;
+  if (use_global_threadpool_) {
+    // All ThreadPoolDevices in the process will use this single fixed
+    // sized threadpool for numerical computations.
+    static LocalDevice::EigenThreadPoolInfo* global_tp_info =
+        new LocalDevice::EigenThreadPoolInfo(options);
+    tp_info = global_tp_info;
+  } else {
+    // Each LocalDevice owns a separate ThreadPoolDevice for numerical
+    // computations.
+    owned_tp_info_.reset(new LocalDevice::EigenThreadPoolInfo(options));
+    tp_info = owned_tp_info_.get();
+  }
+  set_tensorflow_cpu_worker_threads(&tp_info->eigen_worker_threads_);
+  set_eigen_cpu_device(tp_info->eigen_device_.get());
 }
 
+LocalDevice::~LocalDevice() {}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/local_device.h b/tensorflow/core/common_runtime/local_device.h
index 147cd10ea9c..d1c27c62481 100644
--- a/tensorflow/core/common_runtime/local_device.h
+++ b/tensorflow/core/common_runtime/local_device.h
@@ -22,6 +22,9 @@ limitations under the License.
 
 namespace tensorflow {
 
+namespace test {
+class Benchmark;
+}
 struct SessionOptions;
 
 // This class is shared by ThreadPoolDevice and GPUDevice and
@@ -32,9 +35,20 @@ class LocalDevice : public Device {
  public:
   LocalDevice(const SessionOptions& options, const DeviceAttributes& attributes,
               Allocator* device_allocator);
-  ~LocalDevice() override {}
+  ~LocalDevice() override;
 
  private:
+  static bool use_global_threadpool_;
+
+  static void set_use_global_threadpool(bool use_global_threadpool) {
+    use_global_threadpool_ = use_global_threadpool;
+  }
+
+  struct EigenThreadPoolInfo;
+  std::unique_ptr<EigenThreadPoolInfo> owned_tp_info_;
+
+  friend class test::Benchmark;
+
   TF_DISALLOW_COPY_AND_ASSIGN(LocalDevice);
 };
 
diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index c11f7c2c4b9..4752be41ff1 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -72,7 +72,6 @@ Status ShapeRefiner::AddNode(const Node* node) {
   // Create the inference context for this node with the existing input shapes.
   std::unique_ptr<shape_inference::InferenceContext> c(
       new shape_inference::InferenceContext(&node->def(), node->op_def(),
-                                            {} /* input_shapes_string */,
                                             input_shapes, input_tensors));
   if (!c->construction_status().ok()) {
     return c->construction_status();
diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc
index c603c3c9dc3..74279e53309 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.cc
+++ b/tensorflow/core/distributed_runtime/graph_mgr.cc
@@ -264,22 +264,6 @@ Status GraphMgr::DeregisterAll() {
   return Status::OK();
 }
 
-Status GraphMgr::Execute(const string& handle, const int64 step_id,
-                         const ExecutorOpts& opts,
-                         StepStatsCollector* collector,
-                         CancellationManager* cancellation_manager,
-                         const NamedTensors& in, NamedTensors* out) {
-  Notification n;
-  Status status;
-  ExecuteAsync(handle, step_id, opts, collector, cancellation_manager, in, out,
-               [&n, &status](const Status& s) {
-                 status = s;
-                 n.Notify();
-               });
-  n.WaitForNotification();
-  return status;
-}
-
 void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id,
                             const ExecutorOpts& opts,
                             StepStatsCollector* collector,
diff --git a/tensorflow/core/distributed_runtime/graph_mgr.h b/tensorflow/core/distributed_runtime/graph_mgr.h
index 9e9c97215e7..87499995ab5 100644
--- a/tensorflow/core/distributed_runtime/graph_mgr.h
+++ b/tensorflow/core/distributed_runtime/graph_mgr.h
@@ -77,13 +77,6 @@ class GraphMgr {
                     const NamedTensors& in, NamedTensors* out,
                     StatusCallback done);
 
-  // Synchronous wrapper.
-  Status Execute(const string& handle, const int64 step_id,
-                 const ExecutorOpts& opts,
-                 StepStatsCollector* step_stats_collector,
-                 CancellationManager* cancellation_manager,
-                 const NamedTensors& in, NamedTensors* out);
-
   // Deregisters a graph.
   Status Deregister(const string& handle);
 
diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index c1249474110..6183e9fe26b 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -457,7 +457,7 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) {
   TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides));
   if (strides.size() != 4) {
     return errors::InvalidArgument(
-        "AvgPool requires the stride attribute to contain 4 values, but "
+        "MaxPool requires the stride attribute to contain 4 values, but "
         "got: ",
         strides.size());
   }
@@ -466,7 +466,7 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) {
   TF_RETURN_IF_ERROR(c->GetAttr("ksize", &kernel_sizes));
   if (kernel_sizes.size() != 4) {
     return errors::InvalidArgument(
-        "AvgPool requires the ksize attribute to contain 4 values, but got: ",
+        "MaxPool requires the ksize attribute to contain 4 values, but got: ",
         kernel_sizes.size());
   }
 
diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc
index 68b1e6cbc1d..a4efc04467c 100644
--- a/tensorflow/core/framework/common_shape_fns_test.cc
+++ b/tensorflow/core/framework/common_shape_fns_test.cc
@@ -24,6 +24,24 @@ limitations under the License.
 namespace tensorflow {
 namespace shape_inference {
 
+namespace {
+
+TensorShapeProto S(std::initializer_list<int64> dims) {
+  PartialTensorShape shape(dims);
+  TensorShapeProto ret;
+  shape.AsProto(&ret);
+  return ret;
+}
+
+TensorShapeProto Unknown() {
+  PartialTensorShape shape;
+  TensorShapeProto ret;
+  shape.AsProto(&ret);
+  return ret;
+}
+
+}  // namespace
+
 TEST(CommonShapeFnsTest, NoOutputShapeTest) {
   OpRegistrationData op_reg_data;
   TF_CHECK_OK(OpDefBuilder("Assert")
@@ -38,7 +56,7 @@ TEST(CommonShapeFnsTest, NoOutputShapeTest) {
                   .Input({{"data", 0, DT_FLOAT}})
                   .Finalize(&def));
 
-  InferenceContext c(&def, op_def, {"[]", "[10]"}, {});
+  InferenceContext c(&def, op_def, {S({}), S({10})}, {});
   TF_EXPECT_OK(NoOutputs(&c));
   EXPECT_EQ(0, c.num_outputs());
 }
@@ -56,14 +74,14 @@ TEST(CommonShapeFnsTest, ScalarShapeTest) {
       NodeDefBuilder("test", "L2Loss").Input("t", 0, DT_FLOAT).Finalize(&def));
 
   {
-    InferenceContext c(&def, op_def, {"[]"}, {});
+    InferenceContext c(&def, op_def, {S({})}, {});
     TF_EXPECT_OK(ScalarShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(0, c.Rank(output));
   }
 
   {
-    InferenceContext c(&def, op_def, {"[1,23,4,4,2]"}, {});
+    InferenceContext c(&def, op_def, {S({1, 23, 4, 4, 2})}, {});
     TF_EXPECT_OK(ScalarShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(0, c.Rank(output));
@@ -90,7 +108,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
                   .Finalize(&def));
 
   {
-    InferenceContext c(&def, op_def, {"[2,3]", "[3,4]"}, {});
+    InferenceContext c(&def, op_def, {S({2, 3}), S({3, 4})}, {});
     TF_EXPECT_OK(MatMulShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -99,7 +117,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
 
   {
     // Unknown inner dimension for one
-    InferenceContext c(&def, op_def, {"[2,?]", "[3,4]"}, {});
+    InferenceContext c(&def, op_def, {S({2, -1}), S({3, 4})}, {});
     TF_EXPECT_OK(MatMulShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -108,7 +126,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
 
   {
     // Invalid rank.
-    InferenceContext c(&def, op_def, {"[2]", "[3,4]"}, {});
+    InferenceContext c(&def, op_def, {S({2}), S({3, 4})}, {});
     auto s = MatMulShape(&c);
     EXPECT_FALSE(s.ok());
     EXPECT_TRUE(
@@ -118,7 +136,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
 
   {
     // Unknown outer dimension
-    InferenceContext c(&def, op_def, {"[2,3]", "[3,?]"}, {});
+    InferenceContext c(&def, op_def, {S({2, 3}), S({3, -1})}, {});
     TF_EXPECT_OK(MatMulShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -127,7 +145,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
 
   {
     // Inner shapes not compatible
-    InferenceContext c(&def, op_def, {"[2,5]", "[3,4]"}, {});
+    InferenceContext c(&def, op_def, {S({2, 5}), S({3, 4})}, {});
     auto s = MatMulShape(&c);
     EXPECT_FALSE(s.ok());
     EXPECT_TRUE(
@@ -138,7 +156,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
 
   {
     // Inner shapes not compatible
-    InferenceContext c(&def, op_def, {"[2,5,3]", "[3,5,4]"}, {});
+    InferenceContext c(&def, op_def, {S({2, 5, 3}), S({3, 5, 4})}, {});
     auto s = MatMulShape(&c);
     EXPECT_FALSE(s.ok());
     EXPECT_TRUE(
@@ -156,7 +174,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
                     .Attr("type", DT_FLOAT)
                     .Finalize(&def));
 
-    InferenceContext c(&def, op_def, {"[3,2]", "[3,4]"}, {});
+    InferenceContext c(&def, op_def, {S({3, 2}), S({3, 4})}, {});
     auto s = MatMulShape(&c);
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -173,7 +191,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) {
                     .Attr("type", DT_FLOAT)
                     .Finalize(&def));
 
-    InferenceContext c(&def, op_def, {"[2,3]", "[4,3]"}, {});
+    InferenceContext c(&def, op_def, {S({2, 3}), S({4, 3})}, {});
     auto s = MatMulShape(&c);
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -197,7 +215,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                   .Finalize(&def));
 
   {
-    InferenceContext c(&def, op_def, {"[2,10]", "[10]"}, {});
+    InferenceContext c(&def, op_def, {S({2, 10}), S({10})}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(2, c.Value(c.Dim(output, 0)));
@@ -206,7 +224,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
 
   {
     // Unknown ranks.
-    InferenceContext c(&def, op_def, {"?", "?"}, {});
+    InferenceContext c(&def, op_def, {Unknown(), Unknown()}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_FALSE(c.RankKnown(output));
@@ -214,7 +232,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
 
   {
     // Rank > 2
-    InferenceContext c(&def, op_def, {"[4,3,4,2,15]", "[15]"}, {});
+    InferenceContext c(&def, op_def, {S({4, 3, 4, 2, 15}), S({15})}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ("[4,3,4,2,15]", c.DebugString(output));
@@ -227,7 +245,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                     .Input("b", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {"[2,3,4,5]", "[3]"}, {});
+    InferenceContext c(&def, op_def, {S({2, 3, 4, 5}), S({3})}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ("[2,3,4,5]", c.DebugString(output));
@@ -240,7 +258,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                     .Input("b", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {"[8,6,4,2,3,4,5]", "[3]"}, {});
+    InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5}), S({3})}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ("[8,6,4,2,3,4,5]", c.DebugString(output));
@@ -253,7 +271,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                     .Input("b", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {"[10,11,12]", "[10]"}, {});
+    InferenceContext c(&def, op_def, {S({10, 11, 12}), S({10})}, {});
     TF_EXPECT_OK(BiasAddShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ("[10,11,12]", c.DebugString(output));
@@ -261,7 +279,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
 
   {
     // Input rank not high enough
-    InferenceContext c(&def, op_def, {"[3]", "[3]"}, {});
+    InferenceContext c(&def, op_def, {S({3}), S({3})}, {});
     EXPECT_FALSE(BiasAddShape(&c).ok());
   }
 
@@ -273,7 +291,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) {
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
     // NCHW format
-    InferenceContext c(&def, op_def, {"[2,3]", "[3]"}, {});
+    InferenceContext c(&def, op_def, {S({2, 3}), S({3})}, {});
     EXPECT_FALSE(BiasAddShape(&c).ok());
   }
 }
@@ -292,7 +310,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                   .Finalize(&def));
 
   {
-    InferenceContext c(&def, op_def, {"[2,10]"}, {});
+    InferenceContext c(&def, op_def, {S({2, 10})}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(10, c.Value(c.Dim(output, 0)));
@@ -300,7 +318,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
 
   {
     // Rank > 2
-    InferenceContext c(&def, op_def, {"[5,7,2,10]"}, {});
+    InferenceContext c(&def, op_def, {S({5, 7, 2, 10})}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(10, c.Value(c.Dim(output, 0)));
@@ -312,7 +330,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                     .Input("a", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {"[2,3,4,5]"}, {});
+    InferenceContext c(&def, op_def, {S({2, 3, 4, 5})}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(3, c.Value(c.Dim(output, 0)));
@@ -324,7 +342,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                     .Input("a", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {"[8,6,4,2,3,4,5]"}, {});
+    InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5})}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(3, c.Value(c.Dim(output, 0)));
@@ -336,7 +354,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                     .Input("a", 0, DT_FLOAT)
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
-    InferenceContext c(&def, op_def, {"[10,11,12]"}, {});
+    InferenceContext c(&def, op_def, {S({10, 11, 12})}, {});
     TF_EXPECT_OK(BiasAddGradShape(&c));
     ShapeHandle output = c.output(0);
     EXPECT_EQ(10, c.Value(c.Dim(output, 0)));
@@ -344,7 +362,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
 
   {
     // Input rank not high enough
-    InferenceContext c(&def, op_def, {"[3]"}, {});
+    InferenceContext c(&def, op_def, {S({3})}, {});
     EXPECT_FALSE(BiasAddGradShape(&c).ok());
   }
 
@@ -355,7 +373,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) {
                     .Attr("data_format", "NCHW")
                     .Finalize(&def));
     // NCHW format
-    InferenceContext c(&def, op_def, {"[2,3]"}, {});
+    InferenceContext c(&def, op_def, {S({2, 3})}, {});
     EXPECT_FALSE(BiasAddGradShape(&c).ok());
   }
 }
diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index 8dca86f5abb..77a433ddcb5 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -29,26 +29,6 @@ constexpr int64 InferenceContext::kUnknownDim;
 
 InferenceContext::InferenceContext(
     const NodeDef* node_def, const OpDef& op_def,
-    const std::vector<string>& input_shapes,
-    const std::vector<const Tensor*>& input_tensors)
-    : node_def_(*CHECK_NOTNULL(node_def)) {
-  PreInputInit(op_def, input_tensors);
-
-  for (const string& spec : input_shapes) {
-    ShapeHandle shape;
-    construction_status_.Update(MakeShapeFromString(spec, &shape));
-    if (!construction_status_.ok()) {
-      return;
-    }
-    inputs_.push_back(shape);
-  }
-
-  PostInputInit();
-}
-
-InferenceContext::InferenceContext(
-    const NodeDef* node_def, const OpDef& op_def,
-    const std::vector<string>& input_shapes_string,
     const std::vector<TensorShapeProto>& input_shapes,
     const std::vector<const Tensor*>& input_tensors)
     : node_def_(*CHECK_NOTNULL(node_def)) {
@@ -67,7 +47,6 @@ InferenceContext::InferenceContext(
 
 InferenceContext::InferenceContext(
     const NodeDef* node_def, const OpDef& op_def,
-    const std::vector<string>& input_shapes_string,
     const std::vector<ShapeHandle>& input_shapes,
     const std::vector<const Tensor*>& input_tensors)
     : node_def_(*CHECK_NOTNULL(node_def)) {
@@ -78,8 +57,6 @@ InferenceContext::InferenceContext(
 }
 
 InferenceContext::~InferenceContext() {
-  for (auto* s : all_shapes_) delete s;
-  for (auto* d : all_dims_) delete d;
 }
 
 Status InferenceContext::set_output(StringPiece output_name,
@@ -209,11 +186,9 @@ Status InferenceContext::WithRank(ShapeHandle shape, int32 rank,
     std::vector<DimensionHandle> dims;
     dims.reserve(rank);
     for (int i = 0; i < rank; ++i) {
-      all_dims_.push_back(new Dimension());
-      dims.push_back(all_dims_.back());
+      dims.push_back(UnknownDim());
     }
-    all_shapes_.push_back(new Shape(dims));
-    *out = all_shapes_.back();
+    *out = shape_manager_.MakeShape(dims);
     return Status::OK();
   }
   *out = nullptr;
@@ -260,8 +235,7 @@ Status InferenceContext::WithValue(DimensionHandle dim, int64 value,
     return Status::OK();
   }
   if (existing == kUnknownDim) {
-    all_dims_.push_back(new Dimension(value));
-    *out = all_dims_.back();
+    *out = MakeDim(value);
     return Status::OK();
   }
   *out = nullptr;
@@ -454,8 +428,7 @@ Status InferenceContext::ReplaceDim(ShapeHandle s, int dim_index_in,
 
 ShapeHandle InferenceContext::MakeShape(
     const std::vector<DimensionHandle>& dims) {
-  all_shapes_.push_back(new Shape(dims));
-  return all_shapes_.back();
+  return shape_manager_.MakeShape(dims);
 }
 
 ShapeHandle InferenceContext::MakeShape(
@@ -465,12 +438,12 @@ ShapeHandle InferenceContext::MakeShape(
   for (const DimensionOrConstant& d : dims) {
     dims_actual.push_back(MakeDim(d));
   }
-  return MakeShape(dims_actual);
+
+  return shape_manager_.MakeShape(dims_actual);
 }
 
 ShapeHandle InferenceContext::UnknownShape() {
-  all_shapes_.push_back(new Shape());
-  return all_shapes_.back();
+  return shape_manager_.UnknownShape();
 }
 
 ShapeHandle InferenceContext::UnknownShapeOfRank(int32 rank) {
@@ -718,43 +691,6 @@ Status InferenceContext::Max(DimensionHandle first, DimensionOrConstant second,
   return Status::OK();
 }
 
-Status InferenceContext::MakeShapeFromString(const string& spec,
-                                             ShapeHandle* output) {
-  if (spec == "?") {
-    *output = UnknownShape();
-    return Status::OK();
-  }
-
-  std::vector<DimensionHandle> dims;
-  strings::Scanner scanner(spec);
-  scanner.OneLiteral("[");
-  while (scanner.Peek() != ']') {
-    if (scanner.Peek() == '?') {
-      scanner.OneLiteral("?");
-      dims.push_back(UnknownDim());
-    } else {
-      scanner.RestartCapture().Many(strings::Scanner::DIGIT);
-      StringPiece match;
-      int64 dim_size = 0;
-      CHECK(scanner.GetResult(nullptr, &match) &&
-            strings::safe_strto64(match, &dim_size))
-          << spec;
-      dims.push_back(MakeDim(dim_size));
-    }
-
-    if (scanner.Peek() == ',') {
-      scanner.OneLiteral(",");
-    } else if (scanner.Peek() != ']') {
-      return errors::InvalidArgument(
-          "Invalid input spec (] not found in dim shape): ", spec);
-    }
-  }
-  CHECK(scanner.OneLiteral("]").Eos().GetResult());
-  *output = MakeShape(dims);
-
-  return Status::OK();
-}
-
 Status InferenceContext::AttachContext(const Status& status) {
   std::vector<string> input_shapes;
   for (const ShapeHandle& input_shape : inputs_) {
@@ -768,5 +704,25 @@ Status InferenceContext::AttachContext(const Status& status) {
                 strings::StrCat(status.error_message(), error_context));
 }
 
+// -----------------------------------------------------------------------------
+// ShapeManager
+// -----------------------------------------------------------------------------
+InferenceContext::ShapeManager::ShapeManager() {}
+InferenceContext::ShapeManager::~ShapeManager() {
+  for (auto* s : all_shapes_) delete s;
+  for (auto* d : all_dims_) delete d;
+}
+
+ShapeHandle InferenceContext::ShapeManager::MakeShape(
+    const std::vector<DimensionHandle>& dims) {
+  all_shapes_.push_back(new Shape(dims));
+  return all_shapes_.back();
+}
+
+ShapeHandle InferenceContext::ShapeManager::UnknownShape() {
+  all_shapes_.push_back(new Shape());
+  return all_shapes_.back();
+}
+
 }  // namespace shape_inference
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h
index 7ed4a85b5dd..ccb0dc91d0e 100644
--- a/tensorflow/core/framework/shape_inference.h
+++ b/tensorflow/core/framework/shape_inference.h
@@ -41,6 +41,7 @@ class Dimension {
   const int64 value_;
 
   friend class InferenceContext;
+  friend class ShapeManager;
   TF_DISALLOW_COPY_AND_ASSIGN(Dimension);
 };
 
@@ -61,6 +62,7 @@ class DimensionHandle {
   friend class InferenceContext;
   friend class ShapeInferenceTest;
   friend class ShapeInferenceTestutil;
+  friend class ShapeManager;
 
   // Intentionally copyable.
 };
@@ -76,6 +78,7 @@ class Shape {
   const std::vector<DimensionHandle> dims_;
 
   friend class InferenceContext;
+  friend class ShapeManager;
 
   TF_DISALLOW_COPY_AND_ASSIGN(Shape);
 };
@@ -95,6 +98,7 @@ class ShapeHandle {
   friend class InferenceContext;
   friend class ShapeInferenceTest;
   friend class ShapeInferenceTestutil;
+  friend class ShapeManager;
 
   // Intentionally copyable.
 };
@@ -134,43 +138,17 @@ class InferenceContext {
   // <input_tensors> is NULL-padded to be the same size as <input_shapes>.
   //
   // REQUIRES: <node_def> is not NULL, and must outlive the InferenceContext.
-  //
-  // TODO(vrv): Remove 'input_shapes_string' once we can move the
-  // creation of Shapes from strings out of this class (or hide it).
   InferenceContext(const NodeDef* node_def, const OpDef& op_def,
-                   const std::vector<string>& input_shapes_string,
                    const std::vector<ShapeHandle>& input_shapes,
                    const std::vector<const Tensor*>& input_tensors);
 
   // <input_tensors> is NULL-padded to be the same size as <input_shapes>.
   //
   // REQUIRES: <node_def> is not NULL, and must outlive the InferenceContext.
-  //
-  // TODO(cwhipkey): Remove 'input_shapes_string' once we can move the creation
-  // of Shapes from strings out of this class (or hide it).
   InferenceContext(const NodeDef* node_def, const OpDef& op_def,
-                   const std::vector<string>& input_shapes_string,
                    const std::vector<TensorShapeProto>& input_shapes,
                    const std::vector<const Tensor*>& input_tensors);
 
-  // This is a temporary constructor used for initial testing.
-  //
-  // TODO(cwhipkey): remove this temporary constructor.
-  //
-  // Each input shape describes the input shape as follows:
-  // * "?" : the shape's rank and dimensions are unknown
-  // * "[1,?,3]" : the shape's rank is known, and dimensions can be known or
-  //               unknown (? for unknown #1 - multiple dimensions can be
-  //               labeled with the same unknown number, and are deduplicated to
-  //               the same Dimension*.
-  //
-  // <input_tensors> is NULL-padded to be the same size as <input_shapes>.
-  //
-  // REQUIRES: <node_def> is not NULL, and must outlive the InferenceContext.
-  InferenceContext(const NodeDef* node_def, const OpDef& op_def,
-                   const std::vector<string>& input_shapes,
-                   const std::vector<const Tensor*>& input_tensors);
-
   ~InferenceContext();
 
   // Runs the shape inference function 'fn' with 'this' as the
@@ -340,13 +318,9 @@ class InferenceContext {
   // Returns a new dimension of the given size.  The returned value is owned by
   // this context.
   inline DimensionHandle MakeDim(DimensionOrConstant d) {
-    if (d.dim.IsSet()) {
-      return d.dim;
-    } else {
-      all_dims_.push_back(new Dimension(d.val));
-      return all_dims_.back();
-    }
+    return shape_manager_.MakeDim(d);
   }
+
   inline DimensionHandle UnknownDim() { return MakeDim(kUnknownDim); }
 
   // Returns a new dimension whose value is given by a scalar input tensor.
@@ -436,15 +410,43 @@ class InferenceContext {
   }
 
  private:
+  // Creates and stores shapes for use in InferenceContext.
+  class ShapeManager {
+   public:
+    ShapeManager();
+    ~ShapeManager();
+
+    // Returns a new shape with the given dims. The returned value is owned by
+    // this class.
+    ShapeHandle MakeShape(const std::vector<DimensionHandle>& dims);
+
+    // Returns a new unknown shape.
+    ShapeHandle UnknownShape();
+
+    // Returns a new dimension of the given size.  The returned value
+    // is owned by this class.
+    inline DimensionHandle MakeDim(DimensionOrConstant d) {
+      if (d.dim.IsSet()) {
+        return d.dim;
+      } else {
+        all_dims_.push_back(new Dimension(d.val));
+        return all_dims_.back();
+      }
+    }
+
+   private:
+    std::vector<Shape*> all_shapes_;    // values are owned.
+    std::vector<Dimension*> all_dims_;  // values are owned.
+  };
+
+  friend class ShapeInferenceTestutil;  // For testing shapes.
+
   // Shared initialization across the two constructors.  Remove
   // once we get rid of one of them.
   void PreInputInit(const OpDef& op_def,
                     const std::vector<const Tensor*>& input_tensors);
   void PostInputInit();
 
-  // Returns a shape from 'shape_string'.
-  Status MakeShapeFromString(const string& shape_string, ShapeHandle* output);
-
   DimensionHandle GetDimension(const DimensionOrConstant& d);
 
   Status ReturnUnknownShape(ShapeHandle* out) {
@@ -460,10 +462,9 @@ class InferenceContext {
   // Adds additional context to the given status.
   Status AttachContext(const Status& status);
 
-  std::vector<Shape*> all_shapes_;    // values are owned.
-  std::vector<Dimension*> all_dims_;  // values are owned.
+  ShapeManager shape_manager_;
 
-  // inputs_ and outputs_ refer to values from all_shapes_.
+  // inputs_ and outputs_ refer to values from `shape_manager_`.
   std::vector<ShapeHandle> inputs_;
   std::vector<const Tensor*> input_tensors_;
   std::vector<bool> requested_input_tensor_;
diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc
index 2cd58638723..76a485c678f 100644
--- a/tensorflow/core/framework/shape_inference_test.cc
+++ b/tensorflow/core/framework/shape_inference_test.cc
@@ -36,6 +36,20 @@ OpDef MakeOpDefWithLists() {
   return op_reg_data.op_def;
 }
 
+TensorShapeProto S(std::initializer_list<int64> dims) {
+  PartialTensorShape shape(dims);
+  TensorShapeProto ret;
+  shape.AsProto(&ret);
+  return ret;
+}
+
+TensorShapeProto Unknown() {
+  PartialTensorShape shape;
+  TensorShapeProto ret;
+  shape.AsProto(&ret);
+  return ret;
+}
+
 }  // namespace
 
 class ShapeInferenceTest : public ::testing::Test {
@@ -57,7 +71,7 @@ TEST_F(ShapeInferenceTest, InputOutputByName) {
                .Attr("N", 3)
                .Input(FakeInput(DT_FLOAT))
                .Finalize(&def);
-  InferenceContext c(&def, op_def, {"[1,5]", "[2,5]", "[1,3]"}, {});
+  InferenceContext c(&def, op_def, {S({1, 5}), S({2, 5}), S({1, 3})}, {});
 
   EXPECT_EQ("5", c.DebugString(c.NumElements(c.input(0))));
   EXPECT_EQ("10", c.DebugString(c.NumElements(c.input(1))));
@@ -93,7 +107,7 @@ static OpDef MakeOpDef(int num_inputs, int num_outputs) {
 
 TEST_F(ShapeInferenceTest, DimensionOrConstant) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 1), {"?"}, {});
+  InferenceContext c(&def, MakeOpDef(1, 1), {Unknown()}, {});
   EXPECT_EQ(InferenceContext::kUnknownDim,
             c.Value(InferenceContext::kUnknownDim));
   EXPECT_EQ(1, c.Value(1));
@@ -108,7 +122,7 @@ TEST_F(ShapeInferenceTest, Run) {
   NodeDef def;
   def.set_name("foo");
   def.set_op("foo_op");
-  InferenceContext c(&def, MakeOpDef(3, 2), {"[1]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 2), {S({1})}, {});
 
   {
     auto fn = [](InferenceContext* c) {
@@ -139,7 +153,8 @@ TEST_F(ShapeInferenceTest, Run) {
 
 TEST_F(ShapeInferenceTest, RankAndDimInspection) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 2), {"?", "[1,?,3]", "[]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 2), {Unknown(), S({1, -1, 3}), S({})},
+                     {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(2, c.num_outputs());
 
@@ -179,7 +194,8 @@ TEST_F(ShapeInferenceTest, RankAndDimInspection) {
 
 TEST_F(ShapeInferenceTest, NumElements) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 2), {"?", "[1,?,3]", "[5,4,3,2]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 2),
+                     {Unknown(), S({1, -1, 3}), S({5, 4, 3, 2})}, {});
 
   EXPECT_EQ("?", c.DebugString(c.NumElements(c.input(0))));
   EXPECT_EQ("?", c.DebugString(c.NumElements(c.input(1))));
@@ -192,7 +208,7 @@ TEST_F(ShapeInferenceTest, NumElements) {
 
 TEST_F(ShapeInferenceTest, WithRank) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(2, 2), {"?", "[1,?,3]"}, {});
+  InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {});
 
   auto in0 = c.input(0);
   auto in1 = c.input(1);
@@ -230,7 +246,7 @@ TEST_F(ShapeInferenceTest, WithRank) {
 
 TEST_F(ShapeInferenceTest, WithRankAtMost) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(2, 2), {"?", "[1,?,3]"}, {});
+  InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {});
 
   auto in0 = c.input(0);
   auto in1 = c.input(1);
@@ -268,7 +284,7 @@ TEST_F(ShapeInferenceTest, WithRankAtMost) {
 
 TEST_F(ShapeInferenceTest, WithRankAtLeast) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(2, 2), {"?", "[1,?,3]"}, {});
+  InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {});
 
   auto in0 = c.input(0);
   auto in1 = c.input(1);
@@ -306,7 +322,7 @@ TEST_F(ShapeInferenceTest, WithRankAtLeast) {
 
 TEST_F(ShapeInferenceTest, WithValue) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {"[1,?]"}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, -1})}, {});
 
   auto d0 = c.Dim(c.input(0), 0);
   auto d1 = c.Dim(c.input(0), 1);
@@ -347,7 +363,7 @@ TEST_F(ShapeInferenceTest, WithValue) {
 
 TEST_F(ShapeInferenceTest, MergeDim) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {"[2,?,2,1,?]"}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({2, -1, 2, 1, -1})}, {});
 
   auto d2 = c.Dim(c.input(0), 0);
   auto d_unknown = c.Dim(c.input(0), 1);
@@ -394,7 +410,9 @@ TEST_F(ShapeInferenceTest, MergeDim) {
 TEST_F(ShapeInferenceTest, MergeShape) {
   NodeDef def;
   InferenceContext c(&def, MakeOpDef(7, 2),
-                     {"?", "[1,2]", "[?,2]", "[1,?]", "[1,3]", "?", "[1]"}, {});
+                     {Unknown(), S({1, 2}), S({-1, 2}), S({1, -1}), S({1, 3}),
+                      Unknown(), S({1})},
+                     {});
 
   auto s_unknown = c.input(0);
   auto s_1_2 = c.input(1);
@@ -461,7 +479,10 @@ TEST_F(ShapeInferenceTest, MergeShape) {
 
 TEST_F(ShapeInferenceTest, MergePrefix) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(4, 2), {"?", "[?,2]", "[1,?,3]", "[2,4]"},
+  InferenceContext c(&def, MakeOpDef(4, 2),
+                     {
+                         Unknown(), S({-1, 2}), S({1, -1, 3}), S({2, 4}),
+                     },
                      {});
 
   auto s_unknown = c.input(0);
@@ -514,7 +535,8 @@ TEST_F(ShapeInferenceTest, MergePrefix) {
 
 TEST_F(ShapeInferenceTest, Subshape) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(2, 2), {"[1,2,3,?,5]", "?"}, {});
+  InferenceContext c(&def, MakeOpDef(2, 2), {S({1, 2, 3, -1, 5}), Unknown()},
+                     {});
 
   ShapeHandle unknown = c.input(1);
   ShapeHandle out;
@@ -588,7 +610,8 @@ TEST_F(ShapeInferenceTest, Subshape) {
 
 TEST_F(ShapeInferenceTest, Concatenate) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 2), {"[1,?,3]", "[4,5]", "?"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 2),
+                     {S({1, -1, 3}), S({4, 5}), Unknown()}, {});
 
   auto in0 = c.input(0);
   auto in1 = c.input(1);
@@ -614,7 +637,7 @@ TEST_F(ShapeInferenceTest, Concatenate) {
 
 TEST_F(ShapeInferenceTest, ReplaceDim) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(2, 0), {"[1,2,3]", "?"}, {});
+  InferenceContext c(&def, MakeOpDef(2, 0), {S({1, 2, 3}), Unknown()}, {});
 
   auto in = c.input(0);
   auto unknown = c.input(1);
@@ -645,7 +668,7 @@ TEST_F(ShapeInferenceTest, ReplaceDim) {
 
 TEST_F(ShapeInferenceTest, MakeShape) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {"[1,2,3,?,5]"}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, 3, -1, 5})}, {});
 
   std::vector<DimensionHandle> dims;
   auto in0 = c.input(0);
@@ -669,7 +692,8 @@ TEST_F(ShapeInferenceTest, MakeShape) {
 
 TEST_F(ShapeInferenceTest, UnknownShape) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(0, 2), {}, {});
+  std::vector<ShapeHandle> empty;
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
 
   auto u0 = c.UnknownShape();
   auto u1 = c.UnknownShape();
@@ -680,7 +704,8 @@ TEST_F(ShapeInferenceTest, UnknownShape) {
 
 TEST_F(ShapeInferenceTest, Scalar) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(0, 2), {}, {});
+  std::vector<ShapeHandle> empty;
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
 
   auto s0 = c.Scalar();
   EXPECT_EQ("[]", c.DebugString(s0));
@@ -690,7 +715,8 @@ TEST_F(ShapeInferenceTest, Scalar) {
 
 TEST_F(ShapeInferenceTest, Vector) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(0, 2), {}, {});
+  std::vector<ShapeHandle> empty;
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
 
   auto s0 = c.Vector(1);
   EXPECT_EQ("[1]", c.DebugString(s0));
@@ -705,7 +731,8 @@ TEST_F(ShapeInferenceTest, Vector) {
 
 TEST_F(ShapeInferenceTest, Matrix) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(0, 2), {}, {});
+  std::vector<ShapeHandle> empty;
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
 
   auto s0 = c.Matrix(1, 2);
   EXPECT_EQ("[1,2]", c.DebugString(s0));
@@ -727,7 +754,7 @@ TEST_F(ShapeInferenceTest, Matrix) {
 TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) {
   auto create = [&](Tensor* t) {
     NodeDef def;
-    InferenceContext c(&def, MakeOpDef(1, 0), {"?"}, {t});
+    InferenceContext c(&def, MakeOpDef(1, 0), {Unknown()}, {t});
     ShapeHandle out;
     Status s = c.MakeShapeFromShapeTensor(0, &out);
     if (s.ok()) {
@@ -766,7 +793,7 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) {
   // Test when the input shape is wrong.
   {
     NodeDef def;
-    InferenceContext c(&def, MakeOpDef(1, 0), {"[1,?]"}, {nullptr});
+    InferenceContext c(&def, MakeOpDef(1, 0), {S({1, -1})}, {nullptr});
     ShapeHandle out;
     EXPECT_EQ("Shape must be rank 1 but is rank 2",
               c.MakeShapeFromShapeTensor(0, &out).error_message());
@@ -775,7 +802,8 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) {
 
 TEST_F(ShapeInferenceTest, MakeShapeFromShapeProto) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(0, 2), {}, {});
+  std::vector<ShapeHandle> empty;
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
   TensorShapeProto proto;
 
   // With a set unknown rank.
@@ -810,7 +838,8 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeProto) {
 
 TEST_F(ShapeInferenceTest, MakeDim) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(0, 2), {}, {});
+  std::vector<ShapeHandle> empty;
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
 
   auto d0 = c.MakeDim(1);
   auto d1 = c.MakeDim(1);
@@ -823,7 +852,8 @@ TEST_F(ShapeInferenceTest, MakeDim) {
 
 TEST_F(ShapeInferenceTest, UnknownDim) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(0, 2), {}, {});
+  std::vector<ShapeHandle> empty;
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
 
   auto d0 = c.UnknownDim();
   auto d1 = c.UnknownDim();
@@ -834,7 +864,8 @@ TEST_F(ShapeInferenceTest, UnknownDim) {
 
 TEST_F(ShapeInferenceTest, UnknownShapeOfRank) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(0, 2), {}, {});
+  std::vector<ShapeHandle> empty;
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
 
   auto unknown_shape_of_rank_3 = c.UnknownShapeOfRank(3);
   EXPECT_EQ("[?,?,?]", c.DebugString(unknown_shape_of_rank_3));
@@ -847,7 +878,8 @@ TEST_F(ShapeInferenceTest, InputTensors) {
   const Tensor t1 = tensorflow::test::AsTensor<float>({10});
   const Tensor t2 = tensorflow::test::AsTensor<float>({20, 30});
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 2), {"[1]", "[2]", "[3]"}, {&t1, &t2});
+  InferenceContext c(&def, MakeOpDef(3, 2), {S({1}), S({2}), S({3})},
+                     {&t1, &t2});
 
   EXPECT_TRUE(c.input_tensor(0) == &t1);
   EXPECT_TRUE(c.input_tensor(1) == &t2);
@@ -858,7 +890,7 @@ TEST_F(ShapeInferenceTest, MakeDimForScalarInput) {
   Tensor t1 = tensorflow::test::AsScalar<int32>(20);
   Tensor t2 = tensorflow::test::AsScalar<int32>(-1);
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(2, 2), {"[]", "[]"}, {&t1, &t2});
+  InferenceContext c(&def, MakeOpDef(2, 2), {S({}), S({})}, {&t1, &t2});
 
   DimensionHandle d;
   EXPECT_TRUE(c.MakeDimForScalarInput(0, &d).ok());
@@ -888,7 +920,8 @@ TEST_F(ShapeInferenceTest, GetAttr) {
             .Finalize(&def)
             .ok());
 
-  InferenceContext c(&def, op_reg_data.op_def, {}, {});
+  std::vector<ShapeHandle> empty;
+  InferenceContext c(&def, op_reg_data.op_def, empty, {});
   string value;
   EXPECT_TRUE(c.GetAttr("foo", &value).ok());
   EXPECT_EQ("bar", value);
@@ -896,7 +929,7 @@ TEST_F(ShapeInferenceTest, GetAttr) {
 
 TEST_F(ShapeInferenceTest, Divide) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?]"}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1})}, {});
 
   auto s = c.input(0);
   auto d_6 = c.Dim(s, 0);
@@ -946,7 +979,7 @@ TEST_F(ShapeInferenceTest, Divide) {
 
 TEST_F(ShapeInferenceTest, Add) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?,0]"}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0})}, {});
 
   auto s = c.input(0);
   auto d_6 = c.Dim(s, 0);
@@ -997,7 +1030,7 @@ TEST_F(ShapeInferenceTest, Add) {
 
 TEST_F(ShapeInferenceTest, Subtract) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?,0,5]"}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 5})}, {});
 
   auto s = c.input(0);
   auto d_6 = c.Dim(s, 0);
@@ -1046,7 +1079,7 @@ TEST_F(ShapeInferenceTest, Subtract) {
 
 TEST_F(ShapeInferenceTest, Multiply) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?,0,1]"}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 1})}, {});
 
   auto s = c.input(0);
   auto d_6 = c.Dim(s, 0);
@@ -1098,7 +1131,8 @@ TEST_F(ShapeInferenceTest, Multiply) {
 
 TEST_F(ShapeInferenceTest, FullyDefined) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(0, 2), {}, {});
+  std::vector<ShapeHandle> empty;
+  InferenceContext c(&def, MakeOpDef(0, 2), empty, {});
 
   // No rank or missing dimension information should return false.
   EXPECT_FALSE(c.FullyDefined(c.UnknownShape()));
@@ -1111,7 +1145,7 @@ TEST_F(ShapeInferenceTest, FullyDefined) {
 
 TEST_F(ShapeInferenceTest, Min) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {"[1,2,?,0]"}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1, 0})}, {});
 
   auto s = c.input(0);
   auto d_1 = c.Dim(s, 0);
@@ -1159,7 +1193,7 @@ TEST_F(ShapeInferenceTest, Min) {
 
 TEST_F(ShapeInferenceTest, Max) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(1, 2), {"[1,2,?]"}, {});
+  InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1})}, {});
 
   auto s = c.input(0);
   auto d_1 = c.Dim(s, 0);
@@ -1196,7 +1230,8 @@ TEST_F(ShapeInferenceTest, Max) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapes) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {"?", "?", "?"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {Unknown(), Unknown(), Unknown()},
+                     {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1208,7 +1243,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapes) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownDims) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {"[?,?]", "[?]", "[?]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, -1}), S({-1}), S({-1})},
+                     {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1220,7 +1256,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownDims) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidIndicesRank) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {"[?]", "[?]", "[?]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({-1}), S({-1}), S({-1})}, {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1233,7 +1269,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidIndicesRank) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidNumElements) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {"[5,3]", "[4]", "[3]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({4}), S({3})}, {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1246,7 +1282,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidNumElements) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidRank) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {"[5,3]", "[5]", "[4]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({4})}, {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1259,7 +1295,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidRank) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumIndexElements) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {"[?,3]", "[5]", "[3]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, 3}), S({5}), S({3})}, {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1271,7 +1307,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumIndexElements) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumValueElements) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {"[5,3]", "[?]", "[3]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({-1}), S({3})}, {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1283,7 +1319,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumValueElements) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownIndexRank) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {"[5,?]", "[5]", "[3]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, -1}), S({5}), S({3})}, {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1295,7 +1331,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownIndexRank) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapeRank) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {"[5,3]", "[5]", "[?]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({-1})}, {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
@@ -1307,7 +1343,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapeRank) {
 
 TEST_F(ShapeInferenceTest, ValidateSparseTensor) {
   NodeDef def;
-  InferenceContext c(&def, MakeOpDef(3, 1), {"[5,3]", "[5]", "[3]"}, {});
+  InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({3})}, {});
   EXPECT_EQ(3, c.num_inputs());
   EXPECT_EQ(1, c.num_outputs());
 
diff --git a/tensorflow/core/framework/shape_inference_testutil.cc b/tensorflow/core/framework/shape_inference_testutil.cc
index 8d168620d0c..6cad1f8efaa 100644
--- a/tensorflow/core/framework/shape_inference_testutil.cc
+++ b/tensorflow/core/framework/shape_inference_testutil.cc
@@ -16,9 +16,9 @@ limitations under the License.
 
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
@@ -35,8 +35,16 @@ Status ShapeInferenceTestutil::InferShapes(ShapeInferenceTestOp op,
   std::vector<string> ins_v = str_util::Split(ins, ';');
   std::unique_ptr<const NodeDef> new_node_def;
 
-  shape_inference::InferenceContext c(&op.node_def, op_reg_data->op_def, ins_v,
-                                      op.input_tensors);
+  InferenceContext::ShapeManager manager;
+  std::vector<ShapeHandle> in_shapes;
+  for (const string& spec : ins_v) {
+    ShapeHandle shape;
+    TF_RETURN_IF_ERROR(MakeShapeFromString(&manager, spec, &shape));
+    in_shapes.push_back(shape);
+  }
+
+  shape_inference::InferenceContext c(&op.node_def, op_reg_data->op_def,
+                                      in_shapes, op.input_tensors);
   TF_RETURN_IF_ERROR(c.construction_status());
   if (op_reg_data->shape_inference_fn == nullptr) {
     return errors::InvalidArgument(
@@ -199,5 +207,49 @@ Status ShapeInferenceTestutil::InferShapes(ShapeInferenceTestOp op,
   return Status::OK();
 }
 
+// static
+Status ShapeInferenceTestutil::MakeShapeFromString(
+    InferenceContext::ShapeManager* manager, const string& spec,
+    ShapeHandle* output) {
+  if (spec == "?") {
+    *output = manager->UnknownShape();
+    return Status::OK();
+  }
+
+  std::vector<DimensionHandle> dims;
+  strings::Scanner scanner(spec);
+  scanner.OneLiteral("[");
+  while (scanner.Peek() != ']') {
+    if (scanner.Peek() == '?') {
+      scanner.OneLiteral("?");
+      dims.push_back(manager->MakeDim(InferenceContext::kUnknownDim));
+    } else {
+      scanner.RestartCapture().Many(strings::Scanner::DIGIT);
+      StringPiece match;
+      int64 dim_size = 0;
+
+      if (!scanner.GetResult(nullptr, &match) ||
+          !strings::safe_strto64(match, &dim_size)) {
+        return errors::InvalidArgument("Could not parse number in ", spec);
+      }
+
+      dims.push_back(manager->MakeDim(dim_size));
+    }
+
+    if (scanner.Peek() == ',') {
+      scanner.OneLiteral(",");
+    } else if (scanner.Peek() != ']') {
+      return errors::InvalidArgument(
+          "Invalid input spec (] not found in dim shape): ", spec);
+    }
+  }
+  if (!scanner.OneLiteral("]").Eos().GetResult()) {
+    return errors::InvalidArgument("Malformed shape spec: did not end in ']'.");
+  }
+  *output = manager->MakeShape(dims);
+
+  return Status::OK();
+}
+
 }  // namespace shape_inference
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/shape_inference_testutil.h b/tensorflow/core/framework/shape_inference_testutil.h
index b5d187405ad..64067464fb9 100644
--- a/tensorflow/core/framework/shape_inference_testutil.h
+++ b/tensorflow/core/framework/shape_inference_testutil.h
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <vector>
 #include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/shape_inference.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
@@ -65,6 +66,10 @@ class ShapeInferenceTestutil {
 
  private:
   ShapeInferenceTestutil() {}
+
+  // Makes a shape out of 'spec'.
+  static Status MakeShapeFromString(InferenceContext::ShapeManager* manager,
+                                    const string& spec, ShapeHandle* output);
 };
 
 }  // namespace shape_inference
diff --git a/tensorflow/core/framework/summary.proto b/tensorflow/core/framework/summary.proto
index 10ee1c8779a..3560b96dfcc 100644
--- a/tensorflow/core/framework/summary.proto
+++ b/tensorflow/core/framework/summary.proto
@@ -8,6 +8,13 @@ option java_package = "org.tensorflow.framework";
 
 import "tensorflow/core/framework/tensor.proto";
 
+// Metadata associated with a series of Summary data
+message SummaryDescription {
+  // Hint on how plugins should process the data in this series.
+  // Supported values include "scalar", "histogram", "image", "audio"
+  string type_hint = 1;
+}
+
 // Serialization format for histogram module in
 // core/lib/histogram/histogram.h
 message HistogramProto {
diff --git a/tensorflow/core/framework/tensor_shape.cc b/tensorflow/core/framework/tensor_shape.cc
index 4e1a99acd68..fde1916c088 100644
--- a/tensorflow/core/framework/tensor_shape.cc
+++ b/tensorflow/core/framework/tensor_shape.cc
@@ -33,7 +33,7 @@ static void AppendTo(const TensorShape& s, gtl::InlinedVector<int64, 8>* vals) {
 }
 
 void TensorShape::CheckDimsEqual(int NDIMS) const {
-  CHECK_EQ(NDIMS, dims()) << "Asking for tensor of " << NDIMS << "dimensions"
+  CHECK_EQ(NDIMS, dims()) << "Asking for tensor of " << NDIMS << " dimensions"
                           << " from a tensor of " << dims() << " dimensions";
 }
 
diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc
index f9a7b3ee52d..b41e438b2b0 100644
--- a/tensorflow/core/kernels/aggregate_ops.cc
+++ b/tensorflow/core/kernels/aggregate_ops.cc
@@ -139,6 +139,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_ADDN_CPU);
 #if GOOGLE_CUDA
 REGISTER_ADDN(Eigen::half, GPU);
 REGISTER_ADDN(float, GPU);
+REGISTER_ADDN(double, GPU);
 
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/cwise_op_gpu_round.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_round.cu.cc
new file mode 100644
index 00000000000..03d2b2c4423
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_gpu_round.cu.cc
@@ -0,0 +1,26 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
+
+namespace tensorflow {
+namespace functor {
+DEFINE_UNARY5(round, Eigen::half, float, double, int32, int64);
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/cwise_op_round.cc b/tensorflow/core/kernels/cwise_op_round.cc
new file mode 100644
index 00000000000..0457f3931d8
--- /dev/null
+++ b/tensorflow/core/kernels/cwise_op_round.cc
@@ -0,0 +1,25 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/cwise_ops_common.h"
+
+namespace tensorflow {
+REGISTER5(UnaryOp, CPU, "Round", functor::round, Eigen::half, float, double,
+          int32, int64);
+#if GOOGLE_CUDA
+REGISTER5(UnaryOp, GPU, "Round", functor::round, Eigen::half, float, double,
+          int32, int64);
+#endif
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index 24dc93629b4..766c7152b00 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <cmath>
 #include <functional>
+#include <typeinfo>
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/framework/tensor_types.h"
@@ -236,6 +237,48 @@ struct functor_traits<scalar_compose_op<Scalar, UnaryFunctor, BinaryFunctor>> {
   };
 };
 
+#if EIGEN_COMP_GNUC && __cplusplus > 199711L
+#define DISABLE_FLOAT_EQUALITY_WARNING \
+  _Pragma("GCC diagnostic push")       \
+      _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
+#define ENABLE_FLOAT_EQUALITY_WARNING _Pragma("GCC diagnostic pop")
+#else
+#define DISABLE_FLOAT_EQUALITY_WARNING
+#define ENABLE_FLOAT_EQUALITY_WARNING
+#endif
+
+template <typename Scalar>
+struct scalar_round_op_google {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
+  operator()(const Scalar& x) const {
+    EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex),
+                        NUMERIC_TYPE_MUST_BE_REAL)
+
+    Scalar round_val;
+    round_val = Eigen::numext::floor(x);
+    const Scalar fraction = x - round_val;
+    if (fraction > Scalar(.5)) {
+      round_val += Scalar(1.0);
+    } else if (fraction == Scalar(.5)) {
+      const Scalar nearest_even_int =
+          round_val - Scalar(2) * Eigen::numext::floor(Scalar(.5) * x);
+      bool is_odd = (nearest_even_int == Scalar(1));
+      if (is_odd) {
+        round_val += Scalar(1);
+      }
+    }
+    return round_val;
+  }
+};
+
+template <typename Scalar>
+struct functor_traits<scalar_round_op_google<Scalar>> {
+  enum { Cost = 4 * NumTraits<Scalar>::AddCost, PacketAccess = false };
+};
+
+#undef ENABLE_FLOAT_EQUALITY_WARNING
+#undef DISABLE_FLOAT_EQUALITY_WARNING
+
 }  // end namespace internal
 }  // end namespace Eigen
 
@@ -398,6 +441,9 @@ struct isfinite : base<T, Eigen::internal::scalar_isfinite_op<T>, bool> {};
 template <typename T>
 struct floor : base<T, Eigen::internal::scalar_floor_op<T>> {};
 
+template <typename T>
+struct round : base<T, Eigen::internal::scalar_round_op_google<T>> {};
+
 template <typename T>
 struct ceil : base<T, Eigen::internal::scalar_ceil_op<T>> {};
 
diff --git a/tensorflow/core/kernels/nn_ops_test.cc b/tensorflow/core/kernels/nn_ops_test.cc
index 5ff5c297fbd..54a9dd07b1f 100644
--- a/tensorflow/core/kernels/nn_ops_test.cc
+++ b/tensorflow/core/kernels/nn_ops_test.cc
@@ -192,6 +192,7 @@ static void BM_ConvFloat(int iters, int batch, int rows, int cols, int in_depth,
   TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph, g));
 
   string device = use_gpu ? "gpu" : "cpu";
+  testing::UseRealTime();
   test::Benchmark(device, g, &options).Run(iters);
   testing::ItemsProcessed(num_ops * iters);
 }
@@ -557,6 +558,7 @@ static void BM_ConvFloatDepthwise(int iters, int batch, int rows, int cols,
   TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph, g));
 
   string device = use_gpu ? "gpu" : "cpu";
+  testing::UseRealTime();
   test::Benchmark(device, g, &options).Run(iters);
   testing::ItemsProcessed(num_ops * iters);
 }
@@ -1075,6 +1077,7 @@ static void BM_MaxPoolBk(int iters, int batch_size, int rows, int cols,
   Graph* g = new Graph(OpRegistry::Global());
   TF_CHECK_OK(root.ToGraph(g));
   string device = use_gpu ? "gpu" : "cpu";
+  testing::UseRealTime();
   test::Benchmark(device, g).Run(iters);
 
   testing::ItemsProcessed(batch_size * rows * cols * depth * iters);
diff --git a/tensorflow/core/lib/core/blocking_counter.h b/tensorflow/core/lib/core/blocking_counter.h
index ebe7de6b3be..b2411f5951f 100644
--- a/tensorflow/core/lib/core/blocking_counter.h
+++ b/tensorflow/core/lib/core/blocking_counter.h
@@ -16,40 +16,49 @@ limitations under the License.
 #ifndef TENSORFLOW_LIB_CORE_BLOCKING_COUNTER_H_
 #define TENSORFLOW_LIB_CORE_BLOCKING_COUNTER_H_
 
+#include <atomic>
+
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
 class BlockingCounter {
  public:
-  BlockingCounter(int initial_count) : count_(initial_count) {
-    CHECK_GE(count_, 0);
+  BlockingCounter(int initial_count)
+      : state_(initial_count << 1), notified_(false) {
+    CHECK_GE(initial_count, 0);
+    DCHECK_EQ((initial_count << 1) >> 1, initial_count);
   }
 
-  ~BlockingCounter() {}
+  ~BlockingCounter() { DCHECK_EQ(state_ >> 1, 0); }
 
   inline void DecrementCount() {
-    mutex_lock l(mu_);
-    --count_;
-    CHECK(count_ >= 0);
-    if (count_ == 0) {
-      cond_var_.notify_all();
+    unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
+    if (v != 1) {
+      DCHECK_NE(((v + 2) & ~1), 0);
+      return;  // either count has not dropped to 0, or waiter is not waiting
     }
+    mutex_lock l(mu_);
+    DCHECK(!notified_);
+    notified_ = true;
+    cond_var_.notify_all();
   }
 
   inline void Wait() {
+    unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel);
+    if ((v >> 1) == 0) return;
     mutex_lock l(mu_);
-    while (count_ > 0) {
+    while (!notified_) {
       cond_var_.wait(l);
     }
   }
 
  private:
-  int count_;
   mutex mu_;
   condition_variable cond_var_;
+  std::atomic<int> state_;  // low bit is waiter flag
+  bool notified_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/core/blocking_counter_test.cc b/tensorflow/core/lib/core/blocking_counter_test.cc
index 12a30af8210..af56f624e55 100644
--- a/tensorflow/core/lib/core/blocking_counter_test.cc
+++ b/tensorflow/core/lib/core/blocking_counter_test.cc
@@ -13,10 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/platform/test.h"
-
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
 
 namespace tensorflow {
 namespace {
@@ -48,4 +48,28 @@ TEST(BlockingCounterTest, TestMultipleThread) {
 }
 
 }  // namespace
+
+static void BM_BlockingCounter(int iters, int num_threads,
+                               int shards_per_thread) {
+  testing::StopTiming();
+  std::unique_ptr<thread::ThreadPool> thread_pool(
+      new thread::ThreadPool(Env::Default(), "test", num_threads));
+  const int num_shards = num_threads * shards_per_thread;
+  testing::StartTiming();
+  for (int i = 0; i < iters; ++i) {
+    BlockingCounter bc(num_shards);
+    for (int j = 0; j < num_threads; ++j) {
+      thread_pool->Schedule([&bc, shards_per_thread] {
+        for (int k = 0; k < shards_per_thread; ++k) {
+          bc.DecrementCount();
+        }
+      });
+    }
+    bc.Wait();
+  }
+  testing::StopTiming();
+}
+
+BENCHMARK(BM_BlockingCounter)->RangePair(1, 12, 1, 1000);
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/io/random_inputstream.cc b/tensorflow/core/lib/io/random_inputstream.cc
index bb92f0f018d..8b8c1392a1d 100644
--- a/tensorflow/core/lib/io/random_inputstream.cc
+++ b/tensorflow/core/lib/io/random_inputstream.cc
@@ -19,8 +19,15 @@ limitations under the License.
 namespace tensorflow {
 namespace io {
 
-RandomAccessInputStream::RandomAccessInputStream(RandomAccessFile* file)
-    : file_(file) {}
+RandomAccessInputStream::RandomAccessInputStream(RandomAccessFile* file,
+                                                 bool owns_file)
+    : file_(file), owns_file_(owns_file) {}
+
+RandomAccessInputStream::~RandomAccessInputStream() {
+  if (owns_file_) {
+    delete file_;
+  }
+}
 
 Status RandomAccessInputStream::ReadNBytes(int64 bytes_to_read,
                                            string* result) {
diff --git a/tensorflow/core/lib/io/random_inputstream.h b/tensorflow/core/lib/io/random_inputstream.h
index 8ec64d3b2aa..09ebe9ba49e 100644
--- a/tensorflow/core/lib/io/random_inputstream.h
+++ b/tensorflow/core/lib/io/random_inputstream.h
@@ -26,8 +26,11 @@ namespace io {
 // RandomAccessInputStream is NOT safe for concurrent use by multiple threads.
 class RandomAccessInputStream : public InputStreamInterface {
  public:
-  // Does not take ownership of 'file'. 'file' must outlive *this.
-  explicit RandomAccessInputStream(RandomAccessFile* file);
+  // Does not take ownership of 'file' unless owns_file is set to true. 'file'
+  // must outlive *this.
+  RandomAccessInputStream(RandomAccessFile* file, bool owns_file = false);
+
+  ~RandomAccessInputStream();
 
   Status ReadNBytes(int64 bytes_to_read, string* result) override;
 
@@ -43,6 +46,7 @@ class RandomAccessInputStream : public InputStreamInterface {
  private:
   RandomAccessFile* file_;  // Not owned.
   int64 pos_ = 0;           // Tracks where we are in the file.
+  bool owns_file_ = false;
 };
 
 }  // namespace io
diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
index ec0e7801b1e..1238de51c2c 100644
--- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
@@ -30770,6 +30770,43 @@ op {
     }
   }
 }
+op {
+  name: "TensorSummary"
+  input_arg {
+    name: "tensor"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "summary"
+    type: DT_STRING
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "description"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+  attr {
+    name: "labels"
+    type: "list(string)"
+    default_value {
+      list {
+      }
+    }
+  }
+  attr {
+    name: "display_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+  }
+}
 op {
   name: "TextLineReader"
   output_arg {
diff --git a/tensorflow/core/ops/logging_ops.cc b/tensorflow/core/ops/logging_ops.cc
index 83d02a4954e..42bd12a5b3f 100644
--- a/tensorflow/core/ops/logging_ops.cc
+++ b/tensorflow/core/ops/logging_ops.cc
@@ -69,23 +69,17 @@ REGISTER_OP("TensorSummary")
     .Input("tensor: T")
     .Output("summary: string")
     .Attr("T: type")
-    .Attr("display_name: string")
     .Attr("description: string = ''")
     .Attr("labels: list(string) = []")
+    .Attr("display_name: string = ''")
     .SetShapeFn(shape_inference::ScalarShape)
     .Doc(R"doc(
 Outputs a `Summary` protocol buffer with a tensor.
 
 tensor: A tensor to serialize.
-display_name: A name to associate with the data series.
-description: An optional long description of the data being output.
-labels: a list of strings used to specify how the data can be interpreted, e.g.
-  a string tensor containing jpg images should have 'encoding:image/jpg'; a
-  string tensor with foo protos should have 'encoding:proto/X/Y/foo.proto';
-  a numeric tensor containing bounding boxes may have
-  'bounding_box:x1,y1,x2,y2,'. If the tensor is a part of a group of related
-  outputs, that can be encoded through a 'group:$groupName/$roleInGroup' label.
-  Labels may be formatted as 'prefix:value'. The prefix may be re-used.
+description: A json-encoded SummaryDescription proto.
+labels: An unused list of strings.
+display_name: An unused string.
 )doc");
 
 REGISTER_OP("ScalarSummary")
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 1f9c47172e9..cc478e33f02 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -18921,18 +18921,13 @@ op {
     name: "T"
     type: "type"
   }
-  attr {
-    name: "display_name"
-    type: "string"
-    description: "A name to associate with the data series."
-  }
   attr {
     name: "description"
     type: "string"
     default_value {
       s: ""
     }
-    description: "An optional long description of the data being output."
+    description: "A json-encoded SummaryDescription proto."
   }
   attr {
     name: "labels"
@@ -18941,7 +18936,15 @@ op {
       list {
       }
     }
-    description: "a list of strings used to specify how the data can be interpreted, e.g.\na string tensor containing jpg images should have \'encoding:image/jpg\'; a\nstring tensor with foo protos should have \'encoding:proto/X/Y/foo.proto\';\na numeric tensor containing bounding boxes may have\n\'bounding_box:x1,y1,x2,y2,\'. If the tensor is a part of a group of related\noutputs, that can be encoded through a \'group:$groupName/$roleInGroup\' label.\nLabels may be formatted as \'prefix:value\'. The prefix may be re-used."
+    description: "An unused list of strings."
+  }
+  attr {
+    name: "display_name"
+    type: "string"
+    default_value {
+      s: ""
+    }
+    description: "An unused string."
   }
   summary: "Outputs a `Summary` protocol buffer with a tensor."
 }
diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc
index 454fb64e2c3..e7808ca08d5 100644
--- a/tensorflow/core/platform/default/logging.cc
+++ b/tensorflow/core/platform/default/logging.cc
@@ -92,6 +92,11 @@ LogMessageFatal::~LogMessageFatal() {
   abort();
 }
 
+void LogString(const char* fname, int line, int severity,
+               const string& message) {
+  LogMessage(fname, line, severity) << message;
+}
+
 template <>
 void MakeCheckOpValueString(std::ostream* os, const char& v) {
   if (v >= 32 && v <= 126) {
diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc
index e4bd54cbb61..a5dd7b45c4a 100644
--- a/tensorflow/core/platform/env.cc
+++ b/tensorflow/core/platform/env.cc
@@ -315,6 +315,7 @@ Status ReadBinaryProto(Env* env, const string& fname,
 
 Status ReadTextProto(Env* env, const string& fname,
                      ::tensorflow::protobuf::Message* proto) {
+#if !defined(TENSORFLOW_LITE_PROTOS)
   std::unique_ptr<RandomAccessFile> file;
   TF_RETURN_IF_ERROR(env->NewRandomAccessFile(fname, &file));
   std::unique_ptr<FileStream> stream(new FileStream(file.get()));
@@ -324,6 +325,9 @@ Status ReadTextProto(Env* env, const string& fname,
     return errors::DataLoss("Can't parse ", fname, " as text proto");
   }
   return Status::OK();
+#else
+  return errors::Unimplemented("Can't parse text protos with protolite.");
+#endif
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/hexagon/gemm_wrapper.h b/tensorflow/core/platform/hexagon/soc_interface.h
similarity index 79%
rename from tensorflow/core/platform/hexagon/gemm_wrapper.h
rename to tensorflow/core/platform/hexagon/soc_interface.h
index b1c22bafdb4..26c0ca29b3d 100644
--- a/tensorflow/core/platform/hexagon/gemm_wrapper.h
+++ b/tensorflow/core/platform/hexagon/soc_interface.h
@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_PLATFORM_HEXAGON_GEMM_WRAPPER_H_
-#define TENSORFLOW_PLATFORM_HEXAGON_GEMM_WRAPPER_H_
+#ifndef TENSORFLOW_PLATFORM_HEXAGON_SOC_INTERFACE_H_
+#define TENSORFLOW_PLATFORM_HEXAGON_SOC_INTERFACE_H_
 
 // Declaration of APIs provided by hexagon shared library. This header is shared
 // with both hexagon library built with qualcomm SDK and tensorflow.
-// All functions defined here must have prefix "hexagon_gemm_wrapper" to avoid
+// All functions defined here must have prefix "soc_interface" to avoid
 // naming conflicts.
 #ifdef __cplusplus
 extern "C" {
@@ -26,14 +26,14 @@ extern "C" {
 // Returns the version of loaded hexagon wrapper shared library.
 // You should assert that the version matches the expected version before
 // calling APIs defined in this header.
-int hexagon_gemm_wrapper_GetWrapperVersion();
+int soc_interface_GetWrapperVersion();
 // Returns the version of hexagon binary.
 // You should assert that the version matches the expected version before
 // calling APIs defined in this header.
-int hexagon_gemm_wrapper_GetHexagonBinaryVersion();
+int soc_interface_GetHexagonBinaryVersion();
 // TODO(satok): Support gemm APIs via RPC
 #ifdef __cplusplus
 }
 #endif  // __cplusplus
 
-#endif  // TENSORFLOW_PLATFORM_HEXAGON_GEMM_WRAPPER_H_
+#endif  // TENSORFLOW_PLATFORM_HEXAGON_SOC_INTERFACE_H_
diff --git a/tensorflow/core/platform/logging.h b/tensorflow/core/platform/logging.h
index 963dc798294..1ca36db548b 100644
--- a/tensorflow/core/platform/logging.h
+++ b/tensorflow/core/platform/logging.h
@@ -36,6 +36,14 @@ namespace port {
 void AdjustFilenameForLogging(string* filename);
 
 }  // namespace port
+
+namespace internal {
+// Emit "message" as a log message to the log for the specified
+// "severity" as if it came from a LOG call at "fname:line"
+void LogString(const char* fname, int line, int severity,
+               const string& message);
+}  // namespace internal
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_PLATFORM_LOGGING_H_
diff --git a/tensorflow/core/platform/logging_test.cc b/tensorflow/core/platform/logging_test.cc
index c82dc1b5fdb..f395f6419d1 100644
--- a/tensorflow/core/platform/logging_test.cc
+++ b/tensorflow/core/platform/logging_test.cc
@@ -88,4 +88,10 @@ TEST(LoggingDeathTest, FailedChecks) {
 #endif
 }
 
+TEST(InternalLogString, Basic) {
+  // Just make sure that this code compiles (we don't actually verify
+  // the output)
+  internal::LogString(__FILE__, __LINE__, INFO, "Hello there");
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index dc90c17bc04..2be35eb4553 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -97,6 +97,10 @@ message GraphOptions {
   // no cost model.
   int64 build_cost_model = 4;
 
+  // The number of steps to skip before collecting statistics for the
+  // cost model.
+  int64 build_cost_model_after = 9;
+
   // Annotate each Node with Op output shape data, to the extent it can
   // be statically inferred.
   bool infer_shapes = 5;
diff --git a/tensorflow/core/util/env_var.cc b/tensorflow/core/util/env_var.cc
new file mode 100644
index 00000000000..d4e89b966ef
--- /dev/null
+++ b/tensorflow/core/util/env_var.cc
@@ -0,0 +1,63 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/util/env_var.h"
+
+#include <stdlib.h>
+
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+
+Status ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val,
+                          bool* value) {
+  *value = default_val;
+  const char* tf_env_var_val = getenv(env_var_name.ToString().c_str());
+  if (tf_env_var_val == nullptr) {
+    return Status::OK();
+  }
+  string str_value = str_util::Lowercase(tf_env_var_val);
+  if (str_value == "0" || str_value == "false") {
+    *value = false;
+    return Status::OK();
+  } else if (str_value == "1" || str_value == "true") {
+    *value = true;
+    return Status::OK();
+  }
+  return errors::InvalidArgument(strings::StrCat(
+      "Failed to parse the env-var ${", env_var_name, "} into bool: ",
+      tf_env_var_val, ". Use the default value: ", default_val));
+}
+
+Status ReadInt64FromEnvVar(StringPiece env_var_name, int64 default_val,
+                           int64* value) {
+  *value = default_val;
+  const char* tf_env_var_val = getenv(env_var_name.ToString().c_str());
+  if (tf_env_var_val == nullptr) {
+    return Status::OK();
+  }
+  if (strings::safe_strto64(tf_env_var_val, value)) {
+    return Status::OK();
+  }
+  return errors::InvalidArgument(strings::StrCat(
+      "Failed to parse the env-var ${", env_var_name, "} into int64: ",
+      tf_env_var_val, ". Use the default value: ", default_val));
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/util/env_var.h b/tensorflow/core/util/env_var.h
new file mode 100644
index 00000000000..ec661f1d81b
--- /dev/null
+++ b/tensorflow/core/util/env_var.h
@@ -0,0 +1,40 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_UTIL_ENV_VAR_H_
+
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+
+// Return a boolean into "value" from the environmental variable "env_var_name".
+// If it is unset, the default value is used.
+// A string "0" or a case insensitive "false" is interpreted as false.
+// A string "1" or a case insensitive "true" is interpreted as true.
+// Otherwise, an error status is returned.
+Status ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val,
+                          bool* value);
+
+// Return an int64 into "value" from the environmental variable "env_var_name".
+// If it is unset, the default value is used.
+// If the string cannot be parsed into int64, an error status is returned.
+Status ReadInt64FromEnvVar(StringPiece env_var_name, int64 default_val,
+                           int64* value);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_UTIL_ENV_VAR_H_
diff --git a/tensorflow/core/util/tensor_bundle/BUILD b/tensorflow/core/util/tensor_bundle/BUILD
index 493de9721ce..d6db84277f7 100644
--- a/tensorflow/core/util/tensor_bundle/BUILD
+++ b/tensorflow/core/util/tensor_bundle/BUILD
@@ -15,6 +15,8 @@ load("//tensorflow:tensorflow.bzl", "tf_copts")
 filegroup(
     name = "android_srcs",
     srcs = [
+        "naming.cc",
+        "naming.h",
         "tensor_bundle.cc",
         "tensor_bundle.h",
     ],
@@ -26,6 +28,7 @@ cc_library(
     hdrs = ["tensor_bundle.h"],
     copts = tf_copts(),
     deps = [
+        ":naming",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_headers_lib",
@@ -37,6 +40,13 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "naming",
+    srcs = ["naming.cc"],
+    hdrs = ["naming.h"],
+    deps = ["//tensorflow/core:lib"],
+)
+
 cc_test(
     name = "tensor_bundle_test",
     srcs = ["tensor_bundle_test.cc"],
diff --git a/tensorflow/core/util/tensor_bundle/naming.cc b/tensorflow/core/util/tensor_bundle/naming.cc
new file mode 100644
index 00000000000..db3d7ec3acc
--- /dev/null
+++ b/tensorflow/core/util/tensor_bundle/naming.cc
@@ -0,0 +1,36 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/util/tensor_bundle/naming.h"
+
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+
+string MetaFilename(StringPiece prefix) {
+  return strings::Printf("%.*s.index", static_cast<int>(prefix.size()),
+                         prefix.data());
+}
+
+string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards) {
+  DCHECK_GT(num_shards, 0);
+  DCHECK_LT(shard_id, num_shards);
+  return strings::Printf("%.*s.data-%05d-of-%05d",
+                         static_cast<int>(prefix.size()), prefix.data(),
+                         shard_id, num_shards);
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/util/tensor_bundle/naming.h b/tensorflow/core/util/tensor_bundle/naming.h
new file mode 100644
index 00000000000..3d21570c742
--- /dev/null
+++ b/tensorflow/core/util/tensor_bundle/naming.h
@@ -0,0 +1,46 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// A minimal library exposing the naming logic used in tensor_bundle.
+//
+// A tensor bundle contains a metadata file and sharded data files, which all
+// share a common pathname prefix.
+//
+// Given the prefix, the actual pathnames of the files can be queried via:
+//
+//   MetaFilename(prefix): pathname of the metadata file.
+//   DataFilename(prefix, shard_id, num_shards): pathname of a data file.
+//
+// Typical usage includes forming a filepattern to match files on disk:
+//
+//   // To find the unique metadata file.
+//   const string metadata_file = MetaFilename("/fs/train/ckpt-step");
+//   Env::Default()->GetMatchingFiles(metadata_file, &path);
+//
+// Regexp can also be used: e.g. R"<prefix>.data-\d{5}-of-\d{5}" for data files.
+
+#ifndef TENSORFLOW_UTIL_TENSOR_BUNDLE_NAMING_H_
+#define TENSORFLOW_UTIL_TENSOR_BUNDLE_NAMING_H_
+
+#include "tensorflow/core/lib/core/stringpiece.h"
+
+namespace tensorflow {
+
+string MetaFilename(StringPiece prefix);
+string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_UTIL_TENSOR_BUNDLE_NAMING_H_
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
index 27677b57476..61a69a3840f 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc
@@ -239,19 +239,6 @@ bool IsFullSlice(const TensorSlice& slice_spec,
 
 }  // namespace
 
-string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards) {
-  DCHECK_GT(num_shards, 0);
-  DCHECK_LT(shard_id, num_shards);
-  return strings::Printf("%.*s.data-%05d-of-%05d",
-                         static_cast<int>(prefix.size()), prefix.data(),
-                         shard_id, num_shards);
-}
-
-string MetaFilename(StringPiece prefix) {
-  return strings::Printf("%.*s.index", static_cast<int>(prefix.size()),
-                         prefix.data());
-}
-
 BundleWriter::BundleWriter(Env* env, StringPiece prefix)
     : env_(env), prefix_(prefix.ToString()), out_(nullptr), size_(0) {
   status_ =
diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.h b/tensorflow/core/util/tensor_bundle/tensor_bundle.h
index b5ca97b5117..46f6749ed89 100644
--- a/tensorflow/core/util/tensor_bundle/tensor_bundle.h
+++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.h
@@ -78,6 +78,7 @@ limitations under the License.
 #include "tensorflow/core/platform/file_system.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/tensor_bundle/naming.h"
 #include "tensorflow/core/util/tensor_slice_set.h"
 
 namespace tensorflow {
@@ -309,11 +310,6 @@ class FileOutputBuffer {
   uint32 crc32c_ = 0;
 };
 
-// Pattern: "<prefix>.data-<padded shard_id>-of-<padded num_shards>".
-string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards);
-// Pattern: "<prefix>.index."
-string MetaFilename(StringPiece prefix);
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_UTIL_TENSOR_BUNDLE_TENSOR_BUNDLE_H_
diff --git a/tensorflow/core/util/use_cudnn.cc b/tensorflow/core/util/use_cudnn.cc
index 7e720fdc600..47f57c909db 100644
--- a/tensorflow/core/util/use_cudnn.cc
+++ b/tensorflow/core/util/use_cudnn.cc
@@ -15,35 +15,39 @@ limitations under the License.
 
 #include "tensorflow/core/util/use_cudnn.h"
 
-#include <stdlib.h>
-
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/env_var.h"
 
 namespace tensorflow {
 
-static bool ReadBoolFromEnvVar(const char* env_var_name, bool default_val) {
-  const char* tf_env_var_val = getenv(env_var_name);
-  if (tf_env_var_val != nullptr) {
-    StringPiece tf_env_var_val_str(tf_env_var_val);
-    if (tf_env_var_val_str == "0") {
-      return false;
-    }
-    return true;
+bool CanUseCudnn() {
+  bool value;
+  Status status = ReadBoolFromEnvVar("TF_USE_CUDNN", true, &value);
+  if (!status.ok()) {
+    LOG(ERROR) << status.error_message();
   }
-  return default_val;
+  return value;
 }
 
-bool CanUseCudnn() { return ReadBoolFromEnvVar("TF_USE_CUDNN", true); }
-
 bool CudnnUseAutotune() {
-  return ReadBoolFromEnvVar("TF_CUDNN_USE_AUTOTUNE", true);
+  bool value;
+  Status status = ReadBoolFromEnvVar("TF_CUDNN_USE_AUTOTUNE", true, &value);
+  if (!status.ok()) {
+    LOG(ERROR) << status.error_message();
+  }
+  return value;
 }
 
 namespace internal {
 
 bool AvgPoolUseCudnn() {
-  return ReadBoolFromEnvVar("TF_AVGPOOL_USE_CUDNN", false);
+  bool value;
+  Status status = ReadBoolFromEnvVar("TF_AVGPOOL_USE_CUDNN", false, &value);
+  if (!status.ok()) {
+    LOG(ERROR) << status.error_message();
+  }
+  return value;
 }
 
 }  // namespace internal
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowClassifier.java b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowClassifier.java
deleted file mode 100644
index 4a96df854ac..00000000000
--- a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowClassifier.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-package org.tensorflow.demo;
-
-import android.content.res.AssetManager;
-import android.graphics.Bitmap;
-import android.os.Trace;
-import android.util.Log;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.StringTokenizer;
-
-/**
- * JNI wrapper class for the Tensorflow native code.
- */
-public class TensorFlowClassifier implements Classifier {
-  private static final String TAG = "TensorflowClassifier";
-
-  // jni native methods.
-  public native int initializeTensorFlow(
-      AssetManager assetManager,
-      String model,
-      String labels,
-      int numClasses,
-      int inputSize,
-      int imageMean,
-      float imageStd,
-      String inputName,
-      String outputName);
-
-  private native String classifyImageBmp(Bitmap bitmap);
-
-  private native String classifyImageRgb(int[] output, int width, int height);
-
-  static {
-    System.loadLibrary("tensorflow_demo");
-  }
-
-  @Override
-  public List<Recognition> recognizeImage(final Bitmap bitmap) {
-    // Log this method so that it can be analyzed with systrace.
-    Trace.beginSection("Recognize");
-    final ArrayList<Recognition> recognitions = new ArrayList<Recognition>();
-    for (final String result : classifyImageBmp(bitmap).split("\n")) {
-      Log.i(TAG, "Parsing [" + result + "]");
-
-      // Clean up the string as needed
-      final StringTokenizer st = new StringTokenizer(result);
-      if (!st.hasMoreTokens()) {
-        continue;
-      }
-
-      final String id = st.nextToken();
-      final String confidenceString = st.nextToken();
-      final float confidence = Float.parseFloat(confidenceString);
-
-      final String title =
-          result.substring(id.length() + confidenceString.length() + 2, result.length());
-
-      if (!title.isEmpty()) {
-        recognitions.add(new Recognition(id, title, confidence, null));
-      }
-    }
-    Trace.endSection();
-    return recognitions;
-  }
-
-  @Override
-  public void close() {}
-}
diff --git a/tensorflow/examples/tutorials/mnist/BUILD b/tensorflow/examples/tutorials/mnist/BUILD
index 60fd433a206..a8ec5d4c838 100644
--- a/tensorflow/examples/tutorials/mnist/BUILD
+++ b/tensorflow/examples/tutorials/mnist/BUILD
@@ -88,6 +88,7 @@ py_test(
     ],
     main = "fully_connected_feed.py",
     srcs_version = "PY2AND3",
+    tags = ["noasan"],  # http://b/31795146
     deps = [
         ":input_data",
         ":mnist",
@@ -108,7 +109,10 @@ py_test(
     ],
     main = "mnist_with_summaries.py",
     srcs_version = "PY2AND3",
-    tags = ["notsan"],  # http://b/29184009
+    tags = [
+        "noasan",  # http://b/31795146
+        "notsan",  # http://b/29184009
+    ],
     deps = [
         ":input_data",
         "//tensorflow:tensorflow_py",
diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax.py b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
index 1791f97a06d..785ef5767df 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_softmax.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_softmax.py
@@ -46,7 +46,7 @@ def main(_):
 
   # The raw formulation of cross-entropy,
   #
-  #   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)),
+  #   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
   #                                 reduction_indices=[1]))
   #
   # can be numerically unstable.
diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md
new file mode 100644
index 00000000000..226c06c0696
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md
@@ -0,0 +1,1322 @@
+<!-- This file is machine generated: DO NOT EDIT! -->
+
+# Random variable transformations (contrib)
+[TOC]
+
+Bijector Ops.
+
+An API for reversible (bijective) transformations of random variables.
+
+## Background
+
+Differentiable, bijective transformations of continuous random variables alter
+the calculations made in the cumulative/probability distribution functions and
+sample function.  This module provides a standard interface for making these
+manipulations.
+
+For more details and examples, see the `Bijector` docstring.
+
+To apply a `Bijector`, use `distributions.TransformedDistribution`.
+
+## Bijectors
+
+- - -
+
+### `class tf.contrib.distributions.bijector.Bijector` {#Bijector}
+
+Interface for transforming a `Distribution` via `TransformedDistribution`.
+
+A `Bijector` implements a bijective, differentiable function by transforming
+an input `Tensor`. The output `Tensor` shape is constrained by the input
+`sample`, `batch`, and `event` shape.  A `Bijector` is characterized by three
+operations:
+
+1. Forward Evaluation
+
+   Useful for turning one random outcome into another random outcome from a
+   different distribution.
+
+2. Inverse Evaluation
+
+   Useful for "reversing" a transformation to compute one probability in
+   terms of another.
+
+3. (log o det o Jacobian o inverse)(x)
+
+   "The log of the determinant of the matrix of all first-order partial
+   derivatives of the inverse function."
+   Useful for inverting a transformation to compute one probability in terms
+   of another.  Geometrically, the det(Jacobian) is the volume of the
+   transformation and is used to scale the probability.
+
+By convention, transformations of random variables are named in terms of the
+forward transformation. The forward transformation creates samples, the
+inverse is useful for computing probabilities.
+
+Example Use:
+
+  - Basic properties:
+
+  ```python
+  x = ... # A tensor.
+  # Evaluate forward transformation.
+  fwd_x = my_bijector.forward(x)
+  x == my_bijector.inverse(fwd_x)
+  x != my_bijector.forward(fwd_x)  # Not equal because g(x) != g(g(x)).
+  ```
+
+  - Computing a log-likelihood:
+
+  ```python
+  def transformed_log_pdf(bijector, log_pdf, x):
+    return (bijector.inverse_log_det_jacobian(x) +
+            log_pdf(bijector.inverse(x)))
+  ```
+
+  - Transforming a random outcome:
+
+  ```python
+  def transformed_sample(bijector, x):
+    return bijector.forward(x)
+  ```
+
+Example transformations:
+
+  - "Exponential"
+
+    ```
+    Y = g(X) = exp(X)
+    X ~ Normal(0, 1)  # Univariate.
+    ```
+
+    Implies:
+
+    ```
+      g^{-1}(Y) = log(Y)
+      |Jacobian(g^{-1})(y)| = 1 / y
+      Y ~ LogNormal(0, 1), i.e.,
+      prob(Y=y) = |Jacobian(g^{-1})(y)| * prob(X=g^{-1}(y))
+                = (1 / y) Normal(log(y); 0, 1)
+    ```
+
+  - "ScaleAndShift"
+
+    ```
+    Y = g(X) = sqrtSigma * X + mu
+    X ~ MultivariateNormal(0, I_d)
+    ```
+
+    Implies:
+
+    ```
+      g^{-1}(Y) = inv(sqrtSigma) * (Y - mu)
+      |Jacobian(g^{-1})(y)| = det(inv(sqrtSigma))
+      Y ~ MultivariateNormal(mu, sqrtSigma) , i.e.,
+      prob(Y=y) = |Jacobian(g^{-1})(y)| * prob(X=g^{-1}(y))
+                = det(sqrtSigma)^(-d) *
+                  MultivariateNormal(inv(sqrtSigma) * (y - mu); 0, I_d)
+    ```
+
+Example of why a `Bijector` needs to understand sample, batch, event
+partitioning:
+
+- Consider the `Exp` `Bijector` applied to a `Tensor` which has sample, batch,
+  and event (S, B, E) shape semantics.  Suppose
+  the `Tensor`'s partitioned-shape is `(S=[4], B=[2], E=[3, 3])`.
+
+  For `Exp`, the shape of the `Tensor` returned by `forward` and `inverse` is
+  unchanged, i.e., `[4, 2, 3, 3]`. However the shape returned by
+  `inverse_log_det_jacobian` is `[4, 2]` because the Jacobian is a reduction
+  over the event dimensions.
+
+Subclass Requirements:
+
+- Subclasses are expected to implement `_forward` and one or both of:
+    - `_inverse`, `_inverse_log_det_jacobian`,
+    - `_inverse_and_inverse_log_det_jacobian`.
+
+- If computation can be shared among `_inverse` and
+  `_inverse_log_det_jacobian` it is preferable to implement
+  `_inverse_and_inverse_log_det_jacobian`. This usually reduces
+  graph-construction overhead because a `Distribution`'s implementation of
+  `log_prob` will need to evaluate both the inverse Jacobian as well as the
+  inverse function.
+
+- If an additional use case needs just `inverse` or just
+  `inverse_log_det_jacobian` then he or she may also wish to implement these
+  functions to avoid computing the `inverse_log_det_jacobian` or the
+  `inverse`, respectively.
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.__init__(batch_ndims=None, event_ndims=None, parameters=None, is_constant_jacobian=False, validate_args=False, dtype=None, name=None)` {#Bijector.__init__}
+
+Constructs Bijector.
+
+A `Bijector` transforms random variables into new random variables.
+
+Examples:
+
+```python
+# Create the Y = g(X) = X transform which operates on 4-Tensors of vectors.
+identity = Identity(batch_ndims=4, event_ndims=1)
+
+# Create the Y = g(X) = exp(X) transform which operates on matrices.
+exp = Exp(batch_ndims=0, event_ndims=2)
+```
+
+See `Bijector` subclass docstring for more details and specific examples.
+
+##### Args:
+
+
+*  <b>`batch_ndims`</b>: number of dimensions associated with batch coordinates.
+*  <b>`event_ndims`</b>: number of dimensions associated with event coordinates.
+*  <b>`parameters`</b>: Dictionary of parameters used by this `Bijector`
+*  <b>`is_constant_jacobian`</b>: `Boolean` indicating that the Jacobian is not a
+    function of the input.
+*  <b>`validate_args`</b>: `Boolean`, default `False`.  Whether to validate input with
+    asserts. If `validate_args` is `False`, and the inputs are invalid,
+    correct behavior is not guaranteed.
+*  <b>`dtype`</b>: `tf.dtype` supported by this `Bijector`. `None` means dtype is not
+    enforced.
+*  <b>`name`</b>: The name to give Ops created by the initializer.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.dtype` {#Bijector.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.forward(x, name='forward')` {#Bijector.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.inverse(x, name='inverse')` {#Bijector.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Bijector.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Bijector.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.is_constant_jacobian` {#Bijector.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.name` {#Bijector.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.parameters` {#Bijector.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.shaper` {#Bijector.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.validate_args` {#Bijector.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
+
+- - -
+
+### `class tf.contrib.distributions.bijector.Identity` {#Identity}
+
+Bijector which computes Y = g(X) = X.
+
+Example Use:
+
+```python
+# Create the Y=g(X)=X transform which is intended for Tensors with 1 batch
+# ndim and 1 event ndim (i.e., vector of vectors).
+identity = Identity(batch_ndims=1, event_ndims=1)
+x = [[1., 2],
+     [3, 4]]
+x == identity.forward(x) == identity.inverse(x)
+```
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.__init__(validate_args=False, name='Identity')` {#Identity.__init__}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.dtype` {#Identity.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.forward(x, name='forward')` {#Identity.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.inverse(x, name='inverse')` {#Identity.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Identity.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Identity.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.is_constant_jacobian` {#Identity.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.name` {#Identity.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.parameters` {#Identity.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.shaper` {#Identity.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.validate_args` {#Identity.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
+
+- - -
+
+### `class tf.contrib.distributions.bijector.Inline` {#Inline}
+
+Bijector constructed from callables implementing forward, inverse, and inverse_log_det_jacobian.
+
+Example Use:
+
+```python
+exp = Inline(
+  forward_fn=tf.exp,
+  inverse_fn=tf.log,
+  inverse_log_det_jacobian_fn=(
+    lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)),
+  name="Exp")
+```
+
+The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`.
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.__init__(forward_fn, inverse_fn, inverse_log_det_jacobian_fn, is_constant_jacobian=False, name='Inline')` {#Inline.__init__}
+
+Creates a `Bijector` from callables.
+
+##### Args:
+
+
+*  <b>`forward_fn`</b>: Python callable implementing the forward transformation.
+*  <b>`inverse_fn`</b>: Python callable implementing the inverse transformation.
+*  <b>`inverse_log_det_jacobian_fn`</b>: Python callable implementing the
+    inverse_log_det_jacobian transformation.
+*  <b>`is_constant_jacobian`</b>: `Boolean` indicating that the Jacobian is constant
+    for all input arguments.
+*  <b>`name`</b>: `String`, name given to ops managed by this object.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.dtype` {#Inline.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.forward(x, name='forward')` {#Inline.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.inverse(x, name='inverse')` {#Inline.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Inline.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Inline.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.is_constant_jacobian` {#Inline.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.name` {#Inline.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.parameters` {#Inline.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.shaper` {#Inline.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.validate_args` {#Inline.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
+
+- - -
+
+### `class tf.contrib.distributions.bijector.Exp` {#Exp}
+
+Bijector which computes Y = g(X) = exp(X).
+
+Example Use:
+
+```python
+# Create the Y=g(X)=exp(X) transform which works only on Tensors with 1
+# batch ndim and 2 event ndims (i.e., vector of matrices).
+exp = Exp(batch_ndims=1, event_ndims=2)
+x = [[[1., 2],
+       [3, 4]],
+      [[5, 6],
+       [7, 8]]]
+exp(x) == exp.forward(x)
+log(x) == exp.inverse(x)
+```
+
+Note: the exp(.) is applied element-wise but the Jacobian is a reduction
+over the event space.
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.__init__(event_ndims=0, validate_args=False, name='Exp')` {#Exp.__init__}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.dtype` {#Exp.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.forward(x, name='forward')` {#Exp.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.inverse(x, name='inverse')` {#Exp.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Exp.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Exp.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.is_constant_jacobian` {#Exp.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.name` {#Exp.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.parameters` {#Exp.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.shaper` {#Exp.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.validate_args` {#Exp.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
+
+- - -
+
+### `class tf.contrib.distributions.bijector.ScaleAndShift` {#ScaleAndShift}
+
+Bijector which computes Y = g(X; loc, scale) = scale * X + loc.
+
+Example Use:
+
+```python
+# No batch, scalar.
+mu = 0     # shape=[]
+sigma = 1  # shape=[]
+b = ScaleAndShift(loc=mu, scale=sigma)
+# b.shaper.batch_ndims == 0
+# b.shaper.event_ndims == 0
+
+# One batch, scalar.
+mu = ...    # shape=[b], b>0
+sigma = ... # shape=[b], b>0
+b = ScaleAndShift(loc=mu, scale=sigma)
+# b.shaper.batch_ndims == 1
+# b.shaper.event_ndims == 0
+
+# No batch, multivariate.
+mu = ...    # shape=[d],    d>0
+sigma = ... # shape=[d, d], d>0
+b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
+# b.shaper.batch_ndims == 0
+# b.shaper.event_ndims == 1
+
+# (B1*B2*...*Bb)-batch, multivariate.
+mu = ...    # shape=[B1,...,Bb, d],    b>0, d>0
+sigma = ... # shape=[B1,...,Bb, d, d], b>0, d>0
+b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
+# b.shaper.batch_ndims == b
+# b.shaper.event_ndims == 1
+
+# Mu is broadcast:
+mu = 1
+sigma = [I, I]  # I is a 3x3 identity matrix.
+b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
+x = numpy.ones(S + sigma.shape)
+b.forward(x) # == x + 1
+```
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.__init__(loc, scale, event_ndims=0, validate_args=False, name='ScaleAndShift')` {#ScaleAndShift.__init__}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.dtype` {#ScaleAndShift.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.forward(x, name='forward')` {#ScaleAndShift.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse(x, name='inverse')` {#ScaleAndShift.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#ScaleAndShift.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#ScaleAndShift.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.is_constant_jacobian` {#ScaleAndShift.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.loc` {#ScaleAndShift.loc}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.name` {#ScaleAndShift.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.parameters` {#ScaleAndShift.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.scale` {#ScaleAndShift.scale}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.shaper` {#ScaleAndShift.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.validate_args` {#ScaleAndShift.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
+
+
+## Other Functions and Classes
+- - -
+
+### `class tf.contrib.distributions.bijector.Softplus` {#Softplus}
+
+Bijector which computes `Y = g(X) = Log[1 + exp(X)]`.
+
+The softplus `Bijector` has the following two useful properties:
+
+* The domain is the positive real numbers
+* `softplus(x) approx x`, for large `x`, so it does not overflow as easily as
+  the `Exp` `Bijector`.
+
+  Example Use:
+
+  ```python
+  # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1
+  # batch ndim and 2 event ndims (i.e., vector of matrices).
+  softplus = Softplus(batch_ndims=1, event_ndims=2)
+  x = [[[1., 2],
+         [3, 4]],
+        [[5, 6],
+         [7, 8]]]
+  log(1 + exp(x)) == softplus.forward(x)
+  log(exp(x) - 1) == softplus.inverse(x)
+  ```
+
+  Note: log(.) and exp(.) are applied element-wise but the Jacobian is a
+  reduction over the event space.
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.__init__(event_ndims=0, validate_args=False, name='Softplus')` {#Softplus.__init__}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.dtype` {#Softplus.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.forward(x, name='forward')` {#Softplus.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.inverse(x, name='inverse')` {#Softplus.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Softplus.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Softplus.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.is_constant_jacobian` {#Softplus.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.name` {#Softplus.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.parameters` {#Softplus.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.shaper` {#Softplus.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.validate_args` {#Softplus.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
+
diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 0e8a2a798c0..7c059a7de41 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -727,10 +727,12 @@ Initialize a batch of Binomial distributions.
 *  <b>`logits`</b>: Floating point tensor representing the log-odds of a
     positive event with shape broadcastable to `[N1,..., Nm]` `m >= 0`, and
     the same dtype as `n`. Each entry represents logits for the probability
-    of success for independent Binomial distributions.
+    of success for independent Binomial distributions. Only one of
+    `logits` or `p` should be passed in.
 *  <b>`p`</b>: Positive floating point tensor with shape broadcastable to
     `[N1,..., Nm]` `m >= 0`, `p in [0, 1]`. Each entry represents the
-    probability of success for independent Binomial distributions.
+    probability of success for independent Binomial distributions. Only one
+    of `logits` or `p` should be passed in.
 *  <b>`validate_args`</b>: `Boolean`, default `False`.  Whether to assert valid values
     for parameters `n`, `p`, and `x` in `prob` and `log_prob`.
     If `False` and inputs are invalid, correct behavior is not guaranteed.
@@ -1033,7 +1035,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
 
 #### `tf.contrib.distributions.Binomial.logits` {#Binomial.logits}
 
-Log-odds.
+Log-odds of success.
 
 
 - - -
@@ -1321,10 +1323,11 @@ Construct Bernoulli distributions.
 *  <b>`logits`</b>: An N-D `Tensor` representing the log-odds
     of a positive event. Each entry in the `Tensor` parametrizes
     an independent Bernoulli distribution where the probability of an event
-    is sigmoid(logits).
+    is sigmoid(logits). Only one of `logits` or `p` should be passed in.
 *  <b>`p`</b>: An N-D `Tensor` representing the probability of a positive
       event. Each entry in the `Tensor` parameterizes an independent
-      Bernoulli distribution.
+      Bernoulli distribution. Only one of `logits` or `p` should be passed
+      in.
 *  <b>`dtype`</b>: dtype for samples.
 *  <b>`validate_args`</b>: `Boolean`, default `False`.  Whether to validate that
     `0 <= p <= 1`. If `validate_args` is `False`, and the inputs are
@@ -1609,7 +1612,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
 
 #### `tf.contrib.distributions.Bernoulli.logits` {#Bernoulli.logits}
 
-
+Log-odds of success.
 
 
 - - -
@@ -1641,7 +1644,7 @@ Name prepended to all ops created by this `Distribution`.
 
 #### `tf.contrib.distributions.Bernoulli.p` {#Bernoulli.p}
 
-
+Probability of success.
 
 
 - - -
@@ -2142,7 +2145,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
 
 #### `tf.contrib.distributions.BernoulliWithSigmoidP.logits` {#BernoulliWithSigmoidP.logits}
 
-
+Log-odds of success.
 
 
 - - -
@@ -2174,7 +2177,7 @@ Name prepended to all ops created by this `Distribution`.
 
 #### `tf.contrib.distributions.BernoulliWithSigmoidP.p` {#BernoulliWithSigmoidP.p}
 
-
+Probability of success.
 
 
 - - -
@@ -3596,9 +3599,45 @@ Categorical distribution.
 
 The categorical distribution is parameterized by the log-probabilities
 of a set of classes.
+
+#### Examples
+
+Creates a 3-class distiribution, with the 2nd class, the most likely to be
+drawn from.
+
+```python
+p = [0.1, 0.5, 0.4]
+dist = Categorical(p=p)
+```
+
+Creates a 3-class distiribution, with the 2nd class the most likely to be
+drawn from, using logits.
+
+```python
+logits = [-50, 400, 40]
+dist = Categorical(logits=logits)
+```
+
+Creates a 3-class distribution, with the 3rd class is most likely to be drawn.
+The distribution functions can be evaluated on counts.
+
+```python
+# counts is a scalar.
+p = [0.1, 0.4, 0.5]
+dist = Categorical(p=p)
+dist.pmf(0)  # Shape []
+
+# p will be broadcast to [[0.1, 0.4, 0.5], [0.1, 0.4, 0.5]] to match counts.
+counts = [1, 0]
+dist.pmf(counts)  # Shape [2]
+
+# p will be broadcast to shape [3, 5, 7, 3] to match counts.
+counts = [[...]] # Shape [5, 7, 3]
+dist.pmf(counts)  # Shape [5, 7, 3]
+```
 - - -
 
-#### `tf.contrib.distributions.Categorical.__init__(logits, dtype=tf.int32, validate_args=False, allow_nan_stats=True, name='Categorical')` {#Categorical.__init__}
+#### `tf.contrib.distributions.Categorical.__init__(logits=None, p=None, dtype=tf.int32, validate_args=False, allow_nan_stats=True, name='Categorical')` {#Categorical.__init__}
 
 Initialize Categorical distributions using class log-probabilities.
 
@@ -3608,7 +3647,13 @@ Initialize Categorical distributions using class log-probabilities.
 *  <b>`logits`</b>: An N-D `Tensor`, `N >= 1`, representing the log probabilities
       of a set of Categorical distributions. The first `N - 1` dimensions
       index into a batch of independent distributions and the last dimension
-      indexes into the classes.
+      represents a vector of logits for each class. Only one of `logits` or
+      `p` should be passed in.
+*  <b>`p`</b>: An N-D `Tensor`, `N >= 1`, representing the probabilities
+      of a set of Categorical distributions. The first `N - 1` dimensions
+      index into a batch of independent distributions and the last dimension
+      represents a vector of probabilities for each class. Only one of
+      `logits` or `p` should be passed in.
 *  <b>`dtype`</b>: The type of the event samples (default: int32).
 *  <b>`validate_args`</b>: Unused in this distribution.
 *  <b>`allow_nan_stats`</b>: `Boolean`, default `True`.  If `False`, raise an
@@ -3886,7 +3931,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
 
 #### `tf.contrib.distributions.Categorical.logits` {#Categorical.logits}
 
-
+Vector of coordinatewise logits.
 
 
 - - -
@@ -3917,6 +3962,15 @@ Name prepended to all ops created by this `Distribution`.
 Scalar `int32` tensor: the number of classes.
 
 
+- - -
+
+#### `tf.contrib.distributions.Categorical.p` {#Categorical.p}
+
+Vector of probabilities summing to one.
+
+Each element is the probability of drawing that coordinate.
+
+
 - - -
 
 #### `tf.contrib.distributions.Categorical.param_shapes(cls, sample_shape, name='DistributionParamShapes')` {#Categorical.param_shapes}
@@ -17730,12 +17784,13 @@ Initialize a batch of Multinomial distributions.
 *  <b>`logits`</b>: Floating point tensor representing the log-odds of a
     positive event with shape broadcastable to `[N1,..., Nm, k], m >= 0`,
     and the same dtype as `n`. Defines this as a batch of `N1 x ... x Nm`
-    different `k` class Multinomial distributions.
+    different `k` class Multinomial distributions. Only one of `logits` or
+    `p` should be passed in.
 *  <b>`p`</b>: Positive floating point tensor with shape broadcastable to
     `[N1,..., Nm, k]` `m >= 0` and same dtype as `n`.  Defines this as
     a batch of `N1 x ... x Nm` different `k` class Multinomial
     distributions. `p`'s components in the last portion of its shape should
-    sum up to 1.
+    sum up to 1. Only one of `logits` or `p` should be passed in.
 *  <b>`validate_args`</b>: `Boolean`, default `False`.  Whether to assert valid
     values for parameters `n` and `p`, and `x` in `prob` and `log_prob`.
     If `False`, correct behavior is not guaranteed.
@@ -18041,7 +18096,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
 
 #### `tf.contrib.distributions.Multinomial.logits` {#Multinomial.logits}
 
-Log-odds.
+Vector of coordinatewise logits.
 
 
 - - -
@@ -18076,7 +18131,9 @@ Name prepended to all ops created by this `Distribution`.
 
 #### `tf.contrib.distributions.Multinomial.p` {#Multinomial.p}
 
-Event probabilities.
+Vector of probabilities summing to one.
+
+Each element is the probability of drawing that coordinate.
 
 
 - - -
@@ -19582,66 +19639,114 @@ Variance.
 
 A Transformed Distribution.
 
-A Transformed Distribution models `p(y)` given a base distribution `p(x)`,
-an invertible transform, `y = f(x)`, and the determinant of the Jacobian of
-`f(x)`.
+A Transformed Distribution models `p(y)` given a base distribution `p(x)`, and
+a deterministic, invertible, differentiable transform, `Y = g(X)`. The
+transform is typically an instance of the `Bijector` class and the base
+distribution is typically an instance of the `Distribution` class.
 
 Shapes, type, and reparameterization are taken from the base distribution.
 
-#### Mathematical details
+Write `P(Y=y)` for cumulative density function of random variable (rv) `Y` and
+`p` for its derivative wrt to `Y`.  Assume that `Y=g(X)` where `g` is
+continuous and `X=g^{-1}(Y)`. Write `J` for the Jacobian (of some function).
 
-* `p(x)` - probability distribution for random variable X
-* `p(y)` - probability distribution for random variable Y
-* `f` - transform
-* `g` - inverse transform, `g(f(x)) = x`
-* `J(x)` - Jacobian of f(x)
+A `TransformedDistribution` alters the input/outputs of a `Distribution`
+associated with rv `X` in the following ways:
 
-A Transformed Distribution exposes `sample` and `pdf`:
+  * `sample`:
 
-  * `sample`: `y = f(x)`, after drawing a sample of X.
-  * `pdf`: `p(y) = p(x) / det|J(x)| = p(g(y)) / det|J(g(y))|`
+    Mathematically:
+
+    ```
+    Y = g(X)
+    ```
+
+    Programmatically:
+
+    ```python
+    return bijector.forward(distribution.sample(...))
+    ```
+
+  * `log_prob`:
+
+    Mathematically:
+
+    ```
+    (log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)
+    ```
+
+    Programmatically:
+
+    ```python
+    return (bijector.inverse_log_det_jacobian(x) +
+            distribution.log_prob(bijector.inverse(x))
+    ```
+
+  * `log_cdf`:
+
+    Mathematically:
+
+    ```
+    (log o P o g^{-1})(y)
+    ```
+
+    Programmatically:
+
+    ```python
+    return distribution.log_prob(bijector.inverse(x))
+    ```
+
+  * and similarly for: `cdf`, `prob`, `log_survival_function`,
+   `survival_function`.
 
 A simple example constructing a Log-Normal distribution from a Normal
 distribution:
 
 ```python
-logit_normal = TransformedDistribution(
-  base_dist_cls=tf.contrib.distributions.Normal,
-  mu=mu,
-  sigma=sigma,
-  transform=lambda x: tf.sigmoid(x),
-  inverse=lambda y: tf.log(y) - tf.log(1. - y),
-  log_det_jacobian=(lambda x:
-      tf.reduce_sum(tf.log(tf.sigmoid(x)) + tf.log(1. - tf.sigmoid(x)),
-                    reduction_indices=[-1])))
-  name="LogitNormalTransformedDistribution"
-)
+ds = tf.contrib.distributions
+log_normal = ds.TransformedDistribution(
+  base_distribution=ds.Normal(mu=mu, sigma=sigma),
+  bijector=ds.bijector.Exp(),
+  name="LogNormalTransformedDistribution")
+```
+
+A `LogNormal` made from callables:
+
+```python
+ds = tf.contrib.distributions
+log_normal = ds.TransformedDistribution(
+  base_distribution=ds.Normal(mu=mu, sigma=sigma),
+  bijector=ds.bijector.Inline(
+    forward_fn=tf.exp,
+    inverse_fn=tf.log,
+    inverse_log_det_jacobian_fn=(
+      lambda y: -tf.reduce_sum(tf.log(x), reduction_indices=-1)),
+  name="LogNormalTransformedDistribution")
+```
+
+Another example constructing a Normal from a StandardNormal:
+
+```python
+ds = tf.contrib.distributions
+normal = ds.TransformedDistribution(
+  base_distribution=ds.Normal(mu=0, sigma=1),
+  bijector=ds.bijector.ScaleAndShift(loc=mu, scale=sigma, event_ndims=0),
+  name="NormalTransformedDistribution")
 ```
 - - -
 
-#### `tf.contrib.distributions.TransformedDistribution.__init__(base_dist_cls, transform, inverse, log_det_jacobian, name='TransformedDistribution', **base_dist_args)` {#TransformedDistribution.__init__}
+#### `tf.contrib.distributions.TransformedDistribution.__init__(base_distribution, bijector, name='TransformedDistribution')` {#TransformedDistribution.__init__}
 
 Construct a Transformed Distribution.
 
 ##### Args:
 
 
-*  <b>`base_dist_cls`</b>: the base distribution class to transform. Must be a
-      subclass of `Distribution`.
-*  <b>`transform`</b>: a callable that takes a `Tensor` sample from `base_dist` and
-      returns a `Tensor` of the same shape and type. `x => y`.
-*  <b>`inverse`</b>: a callable that computes the inverse of transform. `y => x`. If
-      None, users can only call `log_pdf` on values returned by `sample`.
-*  <b>`log_det_jacobian`</b>: a callable that takes a `Tensor` sample from `base_dist`
-      and returns the log of the determinant of the Jacobian of `transform`.
+*  <b>`base_distribution`</b>: The base distribution class to transform. Typically an
+    instance of `Distribution`.
+*  <b>`bijector`</b>: The object responsible for calculating the transformation.
+    Typically an instance of `Bijector`.
 *  <b>`name`</b>: The name for the distribution.
-*  <b>`**base_dist_args`</b>: kwargs to pass on to dist_cls on construction.
-
-##### Raises:
-
-
-*  <b>`TypeError`</b>: if `base_dist_cls` is not a subclass of
-      `Distribution`.
 
 
 - - -
@@ -19692,6 +19797,13 @@ independent distributions of this kind the instance represents.
 *  <b>`batch_shape`</b>: `Tensor`.
 
 
+- - -
+
+#### `tf.contrib.distributions.TransformedDistribution.bijector` {#TransformedDistribution.bijector}
+
+Function transforming x => y.
+
+
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.cdf(value, name='cdf')` {#TransformedDistribution.cdf}
@@ -19776,13 +19888,6 @@ Same meaning as `event_shape`. May be only partially defined.
 *  <b>`event_shape`</b>: `TensorShape`, possibly unknown.
 
 
-- - -
-
-#### `tf.contrib.distributions.TransformedDistribution.inverse` {#TransformedDistribution.inverse}
-
-Inverse function of transform, y => x.
-
-
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.is_continuous` {#TransformedDistribution.is_continuous}
@@ -19826,13 +19931,6 @@ a more accurate answer than simply taking the logarithm of the `cdf` when
     values of type `self.dtype`.
 
 
-- - -
-
-#### `tf.contrib.distributions.TransformedDistribution.log_det_jacobian` {#TransformedDistribution.log_det_jacobian}
-
-Function computing the log determinant of the Jacobian of transform.
-
-
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.log_pdf(value, name='log_pdf')` {#TransformedDistribution.log_pdf}
@@ -19890,8 +19988,8 @@ Log probability density/mass function (depending on `is_continuous`).
 
 Additional documentation from `TransformedDistribution`:
 
-Implements `(log o p o g)(y) - (log o det o J o g)(y)`,
-where `g` is the inverse of `transform`.
+Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
+where `g^{-1}` is the inverse of `transform`.
 
 Also raises a `ValueError` if `inverse` was not provided to the
 distribution and `y` was not returned from `sample`.
@@ -20065,8 +20163,8 @@ Probability density/mass function (depending on `is_continuous`).
 
 Additional documentation from `TransformedDistribution`:
 
-Implements `p(g(y)) / det|J(g(y))|`, where `g` is the inverse of
-`transform`.
+Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
+inverse of `transform`.
 
 Also raises a `ValueError` if `inverse` was not provided to the
 distribution and `y` was not returned from `sample`.
@@ -20116,7 +20214,7 @@ Generate `n` samples.
 Additional documentation from `TransformedDistribution`:
 
 Samples from the base distribution and then passes through
-the transform.
+the bijector's forward transform.
 
 ##### Args:
 
@@ -20170,13 +20268,6 @@ survival_function(x) = P[X > x]
     `self.dtype`.
 
 
-- - -
-
-#### `tf.contrib.distributions.TransformedDistribution.transform` {#TransformedDistribution.transform}
-
-Function transforming x => y.
-
-
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.validate_args` {#TransformedDistribution.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/contrib.learn.md b/tensorflow/g3doc/api_docs/python/contrib.learn.md
index 1d647aea583..f05cdbab6cb 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.learn.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.learn.md
@@ -295,8 +295,15 @@ Constructs an Estimator instance.
 
 
 *  <b>`model_fn`</b>: Model function, takes features and targets tensors or dicts of
-            tensors and returns predictions and loss tensors.
-            Supports next three signatures for the function:
+            tensors and returns tuple of:
+
+      * predictions: `Tensor`, `SparseTensor` or dictionary of same.
+          Can also be any type that is convertible to a `Tensor` or
+          `SparseTensor`, or dictionary of same.
+      * loss: Scalar loss `Tensor`.
+      * train_op: Training update `Tensor` or `Operation`.
+
+     Supports next three signatures for the function:
 
       * `(features, targets) -> (predictions, loss, train_op)`
       * `(features, targets, mode) -> (predictions, loss, train_op)`
diff --git a/tensorflow/g3doc/api_docs/python/contrib.losses.md b/tensorflow/g3doc/api_docs/python/contrib.losses.md
index 2398f5e8f1b..60a50a4b5e2 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.losses.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.losses.md
@@ -228,82 +228,84 @@ measurable element of `predictions` is scaled by the corresponding value of
 
 - - -
 
-### `tf.contrib.losses.mean_pairwise_squared_error(*args, **kwargs)` {#mean_pairwise_squared_error}
+### `tf.contrib.losses.mean_pairwise_squared_error(predictions, targets, weight=1.0, scope=None)` {#mean_pairwise_squared_error}
 
-Adds a pairwise-errors-squared loss to the training procedure. (deprecated)
+Adds a pairwise-errors-squared loss to the training procedure.
 
-THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01.
-Instructions for updating:
-Use mean_pairwise_squared_error.
+Unlike `mean_squared_error`, which is a measure of the differences between
+corresponding elements of `predictions` and `targets`,
+`mean_pairwise_squared_error` is a measure of the differences between pairs of
+corresponding elements of `predictions` and `targets`.
 
-  Unlike the sum_of_squares loss, which is a measure of the differences between
-  corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares
-  is a measure of the differences between pairs of corresponding elements of
-  `predictions` and `targets`.
+For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are
+three pairs of differences are summed to compute the loss:
+  loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3
 
-  For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are
-  three pairs of differences are summed to compute the loss:
-    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3
+Note that since the inputs are of size [batch_size, d0, ... dN], the
+corresponding pairs are computed within each batch sample but not across
+samples within a batch. For example, if `predictions` represents a batch of
+16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
+is drawn from each image, but not across images.
 
-  Note that since the inputs are of size [batch_size, d0, ... dN], the
-  corresponding pairs are computed within each batch sample but not across
-  samples within a batch. For example, if `predictions` represents a batch of
-  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
-  is drawn from each image, but not across images.
+`weight` acts as a coefficient for the loss. If a scalar is provided, then the
+loss is simply scaled by the given value. If `weight` is a tensor of size
+[batch_size], then the total loss for each sample of the batch is rescaled
+by the corresponding element in the `weight` vector.
 
-  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
-  loss is simply scaled by the given value. If `weight` is a tensor of size
-  [batch_size], then the total loss for each sample of the batch is rescaled
-  by the corresponding element in the `weight` vector.
+##### Args:
 
-  Args:
-    predictions: The predicted outputs, a tensor of size [batch_size, d0, .. dN]
-      where N+1 is the total number of dimensions in `predictions`.
-    targets: The ground truth output tensor, whose shape must match the shape of
-      the `predictions` tensor.
-    weight: Coefficients for the loss a scalar, a tensor of shape [batch_size]
-      or a tensor whose shape matches `predictions`.
-    scope: The scope for the operations performed in computing the loss.
 
-  Returns:
-    A scalar `Tensor` representing the loss value.
+*  <b>`predictions`</b>: The predicted outputs, a tensor of size [batch_size, d0, .. dN]
+    where N+1 is the total number of dimensions in `predictions`.
+*  <b>`targets`</b>: The ground truth output tensor, whose shape must match the shape of
+    the `predictions` tensor.
+*  <b>`weight`</b>: Coefficients for the loss a scalar, a tensor of shape [batch_size]
+    or a tensor whose shape matches `predictions`.
+*  <b>`scope`</b>: The scope for the operations performed in computing the loss.
 
-  Raises:
-    ValueError: If the shape of `predictions` doesn't match that of `targets` or
-      if the shape of `weight` is invalid.
+##### Returns:
+
+  A scalar `Tensor` representing the loss value.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If the shape of `predictions` doesn't match that of `targets` or
+    if the shape of `weight` is invalid.
 
 
 - - -
 
-### `tf.contrib.losses.mean_squared_error(*args, **kwargs)` {#mean_squared_error}
+### `tf.contrib.losses.mean_squared_error(predictions, targets, weight=1.0, scope=None)` {#mean_squared_error}
 
-Adds a Sum-of-Squares loss to the training procedure. (deprecated)
+Adds a Sum-of-Squares loss to the training procedure.
 
-THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01.
-Instructions for updating:
-Use mean_squared_error.
+`weight` acts as a coefficient for the loss. If a scalar is provided, then the
+loss is simply scaled by the given value. If `weight` is a tensor of size
+[batch_size], then the total loss for each sample of the batch is rescaled
+by the corresponding element in the `weight` vector. If the shape of
+`weight` matches the shape of `predictions`, then the loss of each
+measurable element of `predictions` is scaled by the corresponding value of
+`weight`.
 
-  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
-  loss is simply scaled by the given value. If `weight` is a tensor of size
-  [batch_size], then the total loss for each sample of the batch is rescaled
-  by the corresponding element in the `weight` vector. If the shape of
-  `weight` matches the shape of `predictions`, then the loss of each
-  measurable element of `predictions` is scaled by the corresponding value of
-  `weight`.
+##### Args:
 
-  Args:
-    predictions: The predicted outputs.
-    targets: The ground truth output tensor, same dimensions as 'predictions'.
-    weight: Coefficients for the loss a scalar, a tensor of shape
-      [batch_size] or a tensor whose shape matches `predictions`.
-    scope: The scope for the operations performed in computing the loss.
 
-  Returns:
-    A scalar `Tensor` representing the loss value.
+*  <b>`predictions`</b>: The predicted outputs.
+*  <b>`targets`</b>: The ground truth output tensor, same dimensions as 'predictions'.
+*  <b>`weight`</b>: Coefficients for the loss a scalar, a tensor of shape
+    [batch_size] or a tensor whose shape matches `predictions`.
+*  <b>`scope`</b>: The scope for the operations performed in computing the loss.
 
-  Raises:
-    ValueError: If the shape of `predictions` doesn't match that of `targets` or
-      if the shape of `weight` is invalid.
+##### Returns:
+
+  A scalar `Tensor` representing the loss value.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If the shape of `predictions` doesn't match that of `targets` or
+    if the shape of `weight` is invalid.
 
 
 - - -
@@ -411,83 +413,3 @@ corresponding sample.
     if `weight` is None.
 
 
-- - -
-
-### `tf.contrib.losses.sum_of_pairwise_squares(*args, **kwargs)` {#sum_of_pairwise_squares}
-
-Adds a pairwise-errors-squared loss to the training procedure. (deprecated)
-
-THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01.
-Instructions for updating:
-Use mean_pairwise_squared_error.
-
-  Unlike the sum_of_squares loss, which is a measure of the differences between
-  corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares
-  is a measure of the differences between pairs of corresponding elements of
-  `predictions` and `targets`.
-
-  For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are
-  three pairs of differences are summed to compute the loss:
-    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3
-
-  Note that since the inputs are of size [batch_size, d0, ... dN], the
-  corresponding pairs are computed within each batch sample but not across
-  samples within a batch. For example, if `predictions` represents a batch of
-  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
-  is drawn from each image, but not across images.
-
-  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
-  loss is simply scaled by the given value. If `weight` is a tensor of size
-  [batch_size], then the total loss for each sample of the batch is rescaled
-  by the corresponding element in the `weight` vector.
-
-  Args:
-    predictions: The predicted outputs, a tensor of size [batch_size, d0, .. dN]
-      where N+1 is the total number of dimensions in `predictions`.
-    targets: The ground truth output tensor, whose shape must match the shape of
-      the `predictions` tensor.
-    weight: Coefficients for the loss a scalar, a tensor of shape [batch_size]
-      or a tensor whose shape matches `predictions`.
-    scope: The scope for the operations performed in computing the loss.
-
-  Returns:
-    A scalar `Tensor` representing the loss value.
-
-  Raises:
-    ValueError: If the shape of `predictions` doesn't match that of `targets` or
-      if the shape of `weight` is invalid.
-
-
-- - -
-
-### `tf.contrib.losses.sum_of_squares(*args, **kwargs)` {#sum_of_squares}
-
-Adds a Sum-of-Squares loss to the training procedure. (deprecated)
-
-THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01.
-Instructions for updating:
-Use mean_squared_error.
-
-  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
-  loss is simply scaled by the given value. If `weight` is a tensor of size
-  [batch_size], then the total loss for each sample of the batch is rescaled
-  by the corresponding element in the `weight` vector. If the shape of
-  `weight` matches the shape of `predictions`, then the loss of each
-  measurable element of `predictions` is scaled by the corresponding value of
-  `weight`.
-
-  Args:
-    predictions: The predicted outputs.
-    targets: The ground truth output tensor, same dimensions as 'predictions'.
-    weight: Coefficients for the loss a scalar, a tensor of shape
-      [batch_size] or a tensor whose shape matches `predictions`.
-    scope: The scope for the operations performed in computing the loss.
-
-  Returns:
-    A scalar `Tensor` representing the loss value.
-
-  Raises:
-    ValueError: If the shape of `predictions` doesn't match that of `targets` or
-      if the shape of `weight` is invalid.
-
-
diff --git a/tensorflow/g3doc/api_docs/python/contrib.metrics.md b/tensorflow/g3doc/api_docs/python/contrib.metrics.md
index 468d4f96cd4..d3fc05c3a20 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.metrics.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.metrics.md
@@ -355,7 +355,11 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
 
 ### `tf.contrib.metrics.streaming_recall_at_k(*args, **kwargs)` {#streaming_recall_at_k}
 
-Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments)
+Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments) (deprecated)
+
+THIS FUNCTION IS DEPRECATED. It will be removed after 2016-11-08.
+Instructions for updating:
+Please use `streaming_sparse_recall_at_k`, and reshape labels from [batch_size] to [batch_size, 1].
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
 Instructions for updating:
@@ -964,7 +968,7 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
     [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
     target classes for the associated prediction. Commonly, N=1 and `labels`
     has shape [batch_size, num_labels]. [D1, ... DN] must match
-    `predictions_idx`. Values should be in range [0, num_classes], where
+    `predictions`. Values should be in range [0, num_classes], where
     num_classes is the last dimension of `predictions`.
 *  <b>`k`</b>: Integer, k for @k metric. This will calculate an average precision for
     range `[1,k]`, as documented above.
@@ -1031,7 +1035,7 @@ Instructions for updating:
       [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
       target classes for the associated prediction. Commonly, N=1 and `labels`
       has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `predictions_idx`. Values should be in range [0, num_classes], where
+      `predictions`. Values should be in range [0, num_classes], where
       num_classes is the last dimension of `predictions`.
     k: Integer, k for @k metric.
     class_id: Integer class ID for which we want binary metrics. This should be
@@ -1104,7 +1108,7 @@ Instructions for updating:
     labels: `int64` `Tensor` or `SparseTensor` with shape
       [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
       target classes for the associated prediction. Commonly, N=1 and `labels`
-      has shape [batch_size, num_labels]. [D1, ... DN] must match `labels`.
+      has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`.
       Values should be in range [0, num_classes], where num_classes is the last
       dimension of `predictions`.
     k: Integer, k for @k metric.
diff --git a/tensorflow/g3doc/api_docs/python/contrib.training.md b/tensorflow/g3doc/api_docs/python/contrib.training.md
index 8b08e2b8166..9f311c5f632 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.training.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.training.md
@@ -724,10 +724,12 @@ It should be run in a separate thread via e.g. a `QueueRunner`.
 ## Online data resampling
 
 To resample data with replacement on a per-example basis, use
-['resample_at_rate'](#resample_at_rate), providing the desired rate
-for each example. If you wish to specify relative rates, rather than
-absolute ones, use ['weighted_resample'](#weighted_resample) (which
-also returns the actual resampling rate used for each output example).
+['rejection_sample'](#rejection_sample) or
+['resample_at_rate'](#resample_at_rate). For `rejection_sample`, provide
+a boolean Tensor describing whether to accept or reject. For `resample_at_rate`,
+providing the desired rate for each example. If you wish to specify relative
+rates, rather than absolute ones, use ['weighted_resample'](#weighted_resample)
+(which also returns the actual resampling rate used for each output example).
 
 Use ['stratified_sample'](#stratified_sample) or
 ['stratified_sample_unknown_dist'](#stratified_sample_unknown_dist) to
@@ -737,6 +739,66 @@ have a binary classification dataset that is 99.9% class 1, a common
 approach is to resample from the data so that the data is more
 balanced.
 
+- - -
+
+### `tf.contrib.training.rejection_sample(tensors, accept_prob_fn, batch_size, queue_threads=1, enqueue_many=False, prebatch_capacity=16, prebatch_threads=1, runtime_checks=False, name=None)` {#rejection_sample}
+
+Stochastically creates batches by rejection sampling.
+
+Each list of non-batched tensors is evaluated by `accept_prob_fn`, to produce
+a scalar tensor between 0 and 1. This tensor corresponds to the probability of
+being accepted. When `batch_size` tensor groups have been accepted, the batch
+queue will return a mini-batch.
+
+##### Args:
+
+
+*  <b>`tensors`</b>: List of tensors for data. All tensors are either one item or a
+      batch, according to enqueue_many.
+*  <b>`accept_prob_fn`</b>: A python lambda that takes a non-batch tensor from each
+      item in `tensors`, and produces a scalar tensor.
+*  <b>`batch_size`</b>: Size of batch to be returned.
+*  <b>`queue_threads`</b>: The number of threads for the queue that will hold the final
+    batch.
+*  <b>`enqueue_many`</b>: Bool. If true, interpret input tensors as having a batch
+      dimension.
+*  <b>`prebatch_capacity`</b>: Capacity for the large queue that is used to convert
+    batched tensors to single examples.
+*  <b>`prebatch_threads`</b>: Number of threads for the large queue that is used to
+    convert batched tensors to single examples.
+*  <b>`runtime_checks`</b>: Bool. If true, insert runtime checks on the output of
+      `accept_prob_fn`. Using `True` might have a performance impact.
+*  <b>`name`</b>: Optional prefix for ops created by this function.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: enqueue_many is True and labels doesn't have a batch
+      dimension, or if enqueue_many is False and labels isn't a scalar.
+*  <b>`ValueError`</b>: enqueue_many is True, and batch dimension on data and labels
+      don't match.
+*  <b>`ValueError`</b>: if a zero initial probability class has a nonzero target
+      probability.
+
+##### Returns:
+
+  A list of tensors of the same length as `tensors`, with batch dimension
+  `batch_size`.
+
+##### Example:
+
+  # Get tensor for a single data and label example.
+  data, label = data_provider.Get(['data', 'label'])
+
+  # Get stratified batch according to data tensor.
+  accept_prob_fn = lambda x: (tf.tanh(x[0]) + 1) / 2
+  data_batch = tf.contrib.training.rejection_sample(
+      [data, label], accept_prob_fn, 16)
+
+  # Run batch through network.
+  ...
+
+
 - - -
 
 ### `tf.contrib.training.resample_at_rate(inputs, rates, scope=None, seed=None, back_prop=False)` {#resample_at_rate}
diff --git a/tensorflow/g3doc/api_docs/python/control_flow_ops.md b/tensorflow/g3doc/api_docs/python/control_flow_ops.md
index dd7e4158566..70c27127e6c 100644
--- a/tensorflow/g3doc/api_docs/python/control_flow_ops.md
+++ b/tensorflow/g3doc/api_docs/python/control_flow_ops.md
@@ -205,9 +205,9 @@ creates the tensors to be returned if the boolean evaluates to True.
 in `pred_fn_pairs` as well as `default` should return the same number
 and types of tensors.
 
-If `exclusive==True`, all predicates are evaluated, and a logging operation
-with an error is returned if more than one of the predicates evaluates to
-True. If `exclusive==False`, execution stops are the first predicate which
+If `exclusive==True`, all predicates are evaluated, and an exception is
+thrown if more than one of the predicates evaluates to `True`.
+If `exclusive==False`, execution stops are the first predicate which
 evaluates to True, and the tensors generated by the corresponding function
 are returned immediately. If none of the predicates evaluate to True, this
 operation returns the tensors generated by `default`.
@@ -253,7 +253,7 @@ Example 2:
 *  <b>`pred_fn_pairs`</b>: Dict or list of pairs of a boolean scalar tensor and a
                  callable which returns a list of tensors.
 *  <b>`default`</b>: A callable that returns a list of tensors.
-*  <b>`exclusive`</b>: True iff more than one predicate is allowed to evaluate to True.
+*  <b>`exclusive`</b>: True iff at most one predicate is allowed to evaluate to `True`.
 *  <b>`name`</b>: A name for this operation (optional).
 
 ##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/framework.md b/tensorflow/g3doc/api_docs/python/framework.md
index eacd295ef38..24ef7787597 100644
--- a/tensorflow/g3doc/api_docs/python/framework.md
+++ b/tensorflow/g3doc/api_docs/python/framework.md
@@ -1410,6 +1410,9 @@ if tf.constant(5) < tf.constant(7):  # Will raise.
   # ...
 ```
 
+This disallows ambiguities between testing the Python value vs testing the
+dynamic condition of the `Tensor`.
+
 ##### Raises:
 
   `TypeError`.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.case.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.case.md
index 9314837b8ee..02bae13a15e 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.case.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.case.md
@@ -9,9 +9,9 @@ creates the tensors to be returned if the boolean evaluates to True.
 in `pred_fn_pairs` as well as `default` should return the same number
 and types of tensors.
 
-If `exclusive==True`, all predicates are evaluated, and a logging operation
-with an error is returned if more than one of the predicates evaluates to
-True. If `exclusive==False`, execution stops are the first predicate which
+If `exclusive==True`, all predicates are evaluated, and an exception is
+thrown if more than one of the predicates evaluates to `True`.
+If `exclusive==False`, execution stops are the first predicate which
 evaluates to True, and the tensors generated by the corresponding function
 are returned immediately. If none of the predicates evaluate to True, this
 operation returns the tensors generated by `default`.
@@ -57,7 +57,7 @@ Example 2:
 *  <b>`pred_fn_pairs`</b>: Dict or list of pairs of a boolean scalar tensor and a
                  callable which returns a list of tensors.
 *  <b>`default`</b>: A callable that returns a list of tensors.
-*  <b>`exclusive`</b>: True iff more than one predicate is allowed to evaluate to True.
+*  <b>`exclusive`</b>: True iff at most one predicate is allowed to evaluate to `True`.
 *  <b>`name`</b>: A name for this operation (optional).
 
 ##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
index e4ce4b16dd5..e9b11e4b4ee 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
@@ -14,10 +14,11 @@ Construct Bernoulli distributions.
 *  <b>`logits`</b>: An N-D `Tensor` representing the log-odds
     of a positive event. Each entry in the `Tensor` parametrizes
     an independent Bernoulli distribution where the probability of an event
-    is sigmoid(logits).
+    is sigmoid(logits). Only one of `logits` or `p` should be passed in.
 *  <b>`p`</b>: An N-D `Tensor` representing the probability of a positive
       event. Each entry in the `Tensor` parameterizes an independent
-      Bernoulli distribution.
+      Bernoulli distribution. Only one of `logits` or `p` should be passed
+      in.
 *  <b>`dtype`</b>: dtype for samples.
 *  <b>`validate_args`</b>: `Boolean`, default `False`.  Whether to validate that
     `0 <= p <= 1`. If `validate_args` is `False`, and the inputs are
@@ -302,7 +303,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
 
 #### `tf.contrib.distributions.Bernoulli.logits` {#Bernoulli.logits}
 
-
+Log-odds of success.
 
 
 - - -
@@ -334,7 +335,7 @@ Name prepended to all ops created by this `Distribution`.
 
 #### `tf.contrib.distributions.Bernoulli.p` {#Bernoulli.p}
 
-
+Probability of success.
 
 
 - - -
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Tensor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Tensor.md
index 621e994691a..7360430d36b 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Tensor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Tensor.md
@@ -289,6 +289,9 @@ if tf.constant(5) < tf.constant(7):  # Will raise.
   # ...
 ```
 
+This disallows ambiguities between testing the Python value vs testing the
+dynamic condition of the `Tensor`.
+
 ##### Raises:
 
   `TypeError`.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.constant_initializer.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.constant_initializer.md
index 4b141b4fca9..dc761426fc9 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.constant_initializer.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.constant_initializer.md
@@ -72,5 +72,5 @@ tensor shape, the initializer will raise a `ValueError`.
 
 
 *  <b>`ValueError`</b>: Too many elements provided. Needed at most 6, but received 8
-  ```
+```
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index e9000f01015..2ac3000749c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -1,65 +1,113 @@
 A Transformed Distribution.
 
-A Transformed Distribution models `p(y)` given a base distribution `p(x)`,
-an invertible transform, `y = f(x)`, and the determinant of the Jacobian of
-`f(x)`.
+A Transformed Distribution models `p(y)` given a base distribution `p(x)`, and
+a deterministic, invertible, differentiable transform, `Y = g(X)`. The
+transform is typically an instance of the `Bijector` class and the base
+distribution is typically an instance of the `Distribution` class.
 
 Shapes, type, and reparameterization are taken from the base distribution.
 
-#### Mathematical details
+Write `P(Y=y)` for cumulative density function of random variable (rv) `Y` and
+`p` for its derivative wrt to `Y`.  Assume that `Y=g(X)` where `g` is
+continuous and `X=g^{-1}(Y)`. Write `J` for the Jacobian (of some function).
 
-* `p(x)` - probability distribution for random variable X
-* `p(y)` - probability distribution for random variable Y
-* `f` - transform
-* `g` - inverse transform, `g(f(x)) = x`
-* `J(x)` - Jacobian of f(x)
+A `TransformedDistribution` alters the input/outputs of a `Distribution`
+associated with rv `X` in the following ways:
 
-A Transformed Distribution exposes `sample` and `pdf`:
+  * `sample`:
 
-  * `sample`: `y = f(x)`, after drawing a sample of X.
-  * `pdf`: `p(y) = p(x) / det|J(x)| = p(g(y)) / det|J(g(y))|`
+    Mathematically:
+
+    ```
+    Y = g(X)
+    ```
+
+    Programmatically:
+
+    ```python
+    return bijector.forward(distribution.sample(...))
+    ```
+
+  * `log_prob`:
+
+    Mathematically:
+
+    ```
+    (log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)
+    ```
+
+    Programmatically:
+
+    ```python
+    return (bijector.inverse_log_det_jacobian(x) +
+            distribution.log_prob(bijector.inverse(x))
+    ```
+
+  * `log_cdf`:
+
+    Mathematically:
+
+    ```
+    (log o P o g^{-1})(y)
+    ```
+
+    Programmatically:
+
+    ```python
+    return distribution.log_prob(bijector.inverse(x))
+    ```
+
+  * and similarly for: `cdf`, `prob`, `log_survival_function`,
+   `survival_function`.
 
 A simple example constructing a Log-Normal distribution from a Normal
 distribution:
 
 ```python
-logit_normal = TransformedDistribution(
-  base_dist_cls=tf.contrib.distributions.Normal,
-  mu=mu,
-  sigma=sigma,
-  transform=lambda x: tf.sigmoid(x),
-  inverse=lambda y: tf.log(y) - tf.log(1. - y),
-  log_det_jacobian=(lambda x:
-      tf.reduce_sum(tf.log(tf.sigmoid(x)) + tf.log(1. - tf.sigmoid(x)),
-                    reduction_indices=[-1])))
-  name="LogitNormalTransformedDistribution"
-)
+ds = tf.contrib.distributions
+log_normal = ds.TransformedDistribution(
+  base_distribution=ds.Normal(mu=mu, sigma=sigma),
+  bijector=ds.bijector.Exp(),
+  name="LogNormalTransformedDistribution")
+```
+
+A `LogNormal` made from callables:
+
+```python
+ds = tf.contrib.distributions
+log_normal = ds.TransformedDistribution(
+  base_distribution=ds.Normal(mu=mu, sigma=sigma),
+  bijector=ds.bijector.Inline(
+    forward_fn=tf.exp,
+    inverse_fn=tf.log,
+    inverse_log_det_jacobian_fn=(
+      lambda y: -tf.reduce_sum(tf.log(x), reduction_indices=-1)),
+  name="LogNormalTransformedDistribution")
+```
+
+Another example constructing a Normal from a StandardNormal:
+
+```python
+ds = tf.contrib.distributions
+normal = ds.TransformedDistribution(
+  base_distribution=ds.Normal(mu=0, sigma=1),
+  bijector=ds.bijector.ScaleAndShift(loc=mu, scale=sigma, event_ndims=0),
+  name="NormalTransformedDistribution")
 ```
 - - -
 
-#### `tf.contrib.distributions.TransformedDistribution.__init__(base_dist_cls, transform, inverse, log_det_jacobian, name='TransformedDistribution', **base_dist_args)` {#TransformedDistribution.__init__}
+#### `tf.contrib.distributions.TransformedDistribution.__init__(base_distribution, bijector, name='TransformedDistribution')` {#TransformedDistribution.__init__}
 
 Construct a Transformed Distribution.
 
 ##### Args:
 
 
-*  <b>`base_dist_cls`</b>: the base distribution class to transform. Must be a
-      subclass of `Distribution`.
-*  <b>`transform`</b>: a callable that takes a `Tensor` sample from `base_dist` and
-      returns a `Tensor` of the same shape and type. `x => y`.
-*  <b>`inverse`</b>: a callable that computes the inverse of transform. `y => x`. If
-      None, users can only call `log_pdf` on values returned by `sample`.
-*  <b>`log_det_jacobian`</b>: a callable that takes a `Tensor` sample from `base_dist`
-      and returns the log of the determinant of the Jacobian of `transform`.
+*  <b>`base_distribution`</b>: The base distribution class to transform. Typically an
+    instance of `Distribution`.
+*  <b>`bijector`</b>: The object responsible for calculating the transformation.
+    Typically an instance of `Bijector`.
 *  <b>`name`</b>: The name for the distribution.
-*  <b>`**base_dist_args`</b>: kwargs to pass on to dist_cls on construction.
-
-##### Raises:
-
-
-*  <b>`TypeError`</b>: if `base_dist_cls` is not a subclass of
-      `Distribution`.
 
 
 - - -
@@ -110,6 +158,13 @@ independent distributions of this kind the instance represents.
 *  <b>`batch_shape`</b>: `Tensor`.
 
 
+- - -
+
+#### `tf.contrib.distributions.TransformedDistribution.bijector` {#TransformedDistribution.bijector}
+
+Function transforming x => y.
+
+
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.cdf(value, name='cdf')` {#TransformedDistribution.cdf}
@@ -194,13 +249,6 @@ Same meaning as `event_shape`. May be only partially defined.
 *  <b>`event_shape`</b>: `TensorShape`, possibly unknown.
 
 
-- - -
-
-#### `tf.contrib.distributions.TransformedDistribution.inverse` {#TransformedDistribution.inverse}
-
-Inverse function of transform, y => x.
-
-
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.is_continuous` {#TransformedDistribution.is_continuous}
@@ -244,13 +292,6 @@ a more accurate answer than simply taking the logarithm of the `cdf` when
     values of type `self.dtype`.
 
 
-- - -
-
-#### `tf.contrib.distributions.TransformedDistribution.log_det_jacobian` {#TransformedDistribution.log_det_jacobian}
-
-Function computing the log determinant of the Jacobian of transform.
-
-
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.log_pdf(value, name='log_pdf')` {#TransformedDistribution.log_pdf}
@@ -308,8 +349,8 @@ Log probability density/mass function (depending on `is_continuous`).
 
 Additional documentation from `TransformedDistribution`:
 
-Implements `(log o p o g)(y) - (log o det o J o g)(y)`,
-where `g` is the inverse of `transform`.
+Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`,
+where `g^{-1}` is the inverse of `transform`.
 
 Also raises a `ValueError` if `inverse` was not provided to the
 distribution and `y` was not returned from `sample`.
@@ -483,8 +524,8 @@ Probability density/mass function (depending on `is_continuous`).
 
 Additional documentation from `TransformedDistribution`:
 
-Implements `p(g(y)) / det|J(g(y))|`, where `g` is the inverse of
-`transform`.
+Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the
+inverse of `transform`.
 
 Also raises a `ValueError` if `inverse` was not provided to the
 distribution and `y` was not returned from `sample`.
@@ -534,7 +575,7 @@ Generate `n` samples.
 Additional documentation from `TransformedDistribution`:
 
 Samples from the base distribution and then passes through
-the transform.
+the bijector's forward transform.
 
 ##### Args:
 
@@ -588,13 +629,6 @@ survival_function(x) = P[X > x]
     `self.dtype`.
 
 
-- - -
-
-#### `tf.contrib.distributions.TransformedDistribution.transform` {#TransformedDistribution.transform}
-
-Function transforming x => y.
-
-
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.validate_args` {#TransformedDistribution.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.losses.sum_of_pairwise_squares.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.losses.sum_of_pairwise_squares.md
deleted file mode 100644
index 13326b052a6..00000000000
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.losses.sum_of_pairwise_squares.md
+++ /dev/null
@@ -1,44 +0,0 @@
-### `tf.contrib.losses.sum_of_pairwise_squares(*args, **kwargs)` {#sum_of_pairwise_squares}
-
-Adds a pairwise-errors-squared loss to the training procedure. (deprecated)
-
-THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01.
-Instructions for updating:
-Use mean_pairwise_squared_error.
-
-  Unlike the sum_of_squares loss, which is a measure of the differences between
-  corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares
-  is a measure of the differences between pairs of corresponding elements of
-  `predictions` and `targets`.
-
-  For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are
-  three pairs of differences are summed to compute the loss:
-    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3
-
-  Note that since the inputs are of size [batch_size, d0, ... dN], the
-  corresponding pairs are computed within each batch sample but not across
-  samples within a batch. For example, if `predictions` represents a batch of
-  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
-  is drawn from each image, but not across images.
-
-  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
-  loss is simply scaled by the given value. If `weight` is a tensor of size
-  [batch_size], then the total loss for each sample of the batch is rescaled
-  by the corresponding element in the `weight` vector.
-
-  Args:
-    predictions: The predicted outputs, a tensor of size [batch_size, d0, .. dN]
-      where N+1 is the total number of dimensions in `predictions`.
-    targets: The ground truth output tensor, whose shape must match the shape of
-      the `predictions` tensor.
-    weight: Coefficients for the loss a scalar, a tensor of shape [batch_size]
-      or a tensor whose shape matches `predictions`.
-    scope: The scope for the operations performed in computing the loss.
-
-  Returns:
-    A scalar `Tensor` representing the loss value.
-
-  Raises:
-    ValueError: If the shape of `predictions` doesn't match that of `targets` or
-      if the shape of `weight` is invalid.
-
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md
index ed0094534ce..1b19ba9c7b7 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md
@@ -39,7 +39,7 @@ Instructions for updating:
     labels: `int64` `Tensor` or `SparseTensor` with shape
       [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
       target classes for the associated prediction. Commonly, N=1 and `labels`
-      has shape [batch_size, num_labels]. [D1, ... DN] must match `labels`.
+      has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`.
       Values should be in range [0, num_classes], where num_classes is the last
       dimension of `predictions`.
     k: Integer, k for @k metric.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
index acecb8e52cb..7d3f2a3a252 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
@@ -2,9 +2,45 @@ Categorical distribution.
 
 The categorical distribution is parameterized by the log-probabilities
 of a set of classes.
+
+#### Examples
+
+Creates a 3-class distiribution, with the 2nd class, the most likely to be
+drawn from.
+
+```python
+p = [0.1, 0.5, 0.4]
+dist = Categorical(p=p)
+```
+
+Creates a 3-class distiribution, with the 2nd class the most likely to be
+drawn from, using logits.
+
+```python
+logits = [-50, 400, 40]
+dist = Categorical(logits=logits)
+```
+
+Creates a 3-class distribution, with the 3rd class is most likely to be drawn.
+The distribution functions can be evaluated on counts.
+
+```python
+# counts is a scalar.
+p = [0.1, 0.4, 0.5]
+dist = Categorical(p=p)
+dist.pmf(0)  # Shape []
+
+# p will be broadcast to [[0.1, 0.4, 0.5], [0.1, 0.4, 0.5]] to match counts.
+counts = [1, 0]
+dist.pmf(counts)  # Shape [2]
+
+# p will be broadcast to shape [3, 5, 7, 3] to match counts.
+counts = [[...]] # Shape [5, 7, 3]
+dist.pmf(counts)  # Shape [5, 7, 3]
+```
 - - -
 
-#### `tf.contrib.distributions.Categorical.__init__(logits, dtype=tf.int32, validate_args=False, allow_nan_stats=True, name='Categorical')` {#Categorical.__init__}
+#### `tf.contrib.distributions.Categorical.__init__(logits=None, p=None, dtype=tf.int32, validate_args=False, allow_nan_stats=True, name='Categorical')` {#Categorical.__init__}
 
 Initialize Categorical distributions using class log-probabilities.
 
@@ -14,7 +50,13 @@ Initialize Categorical distributions using class log-probabilities.
 *  <b>`logits`</b>: An N-D `Tensor`, `N >= 1`, representing the log probabilities
       of a set of Categorical distributions. The first `N - 1` dimensions
       index into a batch of independent distributions and the last dimension
-      indexes into the classes.
+      represents a vector of logits for each class. Only one of `logits` or
+      `p` should be passed in.
+*  <b>`p`</b>: An N-D `Tensor`, `N >= 1`, representing the probabilities
+      of a set of Categorical distributions. The first `N - 1` dimensions
+      index into a batch of independent distributions and the last dimension
+      represents a vector of probabilities for each class. Only one of
+      `logits` or `p` should be passed in.
 *  <b>`dtype`</b>: The type of the event samples (default: int32).
 *  <b>`validate_args`</b>: Unused in this distribution.
 *  <b>`allow_nan_stats`</b>: `Boolean`, default `True`.  If `False`, raise an
@@ -292,7 +334,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
 
 #### `tf.contrib.distributions.Categorical.logits` {#Categorical.logits}
 
-
+Vector of coordinatewise logits.
 
 
 - - -
@@ -323,6 +365,15 @@ Name prepended to all ops created by this `Distribution`.
 Scalar `int32` tensor: the number of classes.
 
 
+- - -
+
+#### `tf.contrib.distributions.Categorical.p` {#Categorical.p}
+
+Vector of probabilities summing to one.
+
+Each element is the probability of drawing that coordinate.
+
+
 - - -
 
 #### `tf.contrib.distributions.Categorical.param_shapes(cls, sample_shape, name='DistributionParamShapes')` {#Categorical.param_shapes}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md
new file mode 100644
index 00000000000..be9565eb653
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md
@@ -0,0 +1,315 @@
+Interface for transforming a `Distribution` via `TransformedDistribution`.
+
+A `Bijector` implements a bijective, differentiable function by transforming
+an input `Tensor`. The output `Tensor` shape is constrained by the input
+`sample`, `batch`, and `event` shape.  A `Bijector` is characterized by three
+operations:
+
+1. Forward Evaluation
+
+   Useful for turning one random outcome into another random outcome from a
+   different distribution.
+
+2. Inverse Evaluation
+
+   Useful for "reversing" a transformation to compute one probability in
+   terms of another.
+
+3. (log o det o Jacobian o inverse)(x)
+
+   "The log of the determinant of the matrix of all first-order partial
+   derivatives of the inverse function."
+   Useful for inverting a transformation to compute one probability in terms
+   of another.  Geometrically, the det(Jacobian) is the volume of the
+   transformation and is used to scale the probability.
+
+By convention, transformations of random variables are named in terms of the
+forward transformation. The forward transformation creates samples, the
+inverse is useful for computing probabilities.
+
+Example Use:
+
+  - Basic properties:
+
+  ```python
+  x = ... # A tensor.
+  # Evaluate forward transformation.
+  fwd_x = my_bijector.forward(x)
+  x == my_bijector.inverse(fwd_x)
+  x != my_bijector.forward(fwd_x)  # Not equal because g(x) != g(g(x)).
+  ```
+
+  - Computing a log-likelihood:
+
+  ```python
+  def transformed_log_pdf(bijector, log_pdf, x):
+    return (bijector.inverse_log_det_jacobian(x) +
+            log_pdf(bijector.inverse(x)))
+  ```
+
+  - Transforming a random outcome:
+
+  ```python
+  def transformed_sample(bijector, x):
+    return bijector.forward(x)
+  ```
+
+Example transformations:
+
+  - "Exponential"
+
+    ```
+    Y = g(X) = exp(X)
+    X ~ Normal(0, 1)  # Univariate.
+    ```
+
+    Implies:
+
+    ```
+      g^{-1}(Y) = log(Y)
+      |Jacobian(g^{-1})(y)| = 1 / y
+      Y ~ LogNormal(0, 1), i.e.,
+      prob(Y=y) = |Jacobian(g^{-1})(y)| * prob(X=g^{-1}(y))
+                = (1 / y) Normal(log(y); 0, 1)
+    ```
+
+  - "ScaleAndShift"
+
+    ```
+    Y = g(X) = sqrtSigma * X + mu
+    X ~ MultivariateNormal(0, I_d)
+    ```
+
+    Implies:
+
+    ```
+      g^{-1}(Y) = inv(sqrtSigma) * (Y - mu)
+      |Jacobian(g^{-1})(y)| = det(inv(sqrtSigma))
+      Y ~ MultivariateNormal(mu, sqrtSigma) , i.e.,
+      prob(Y=y) = |Jacobian(g^{-1})(y)| * prob(X=g^{-1}(y))
+                = det(sqrtSigma)^(-d) *
+                  MultivariateNormal(inv(sqrtSigma) * (y - mu); 0, I_d)
+    ```
+
+Example of why a `Bijector` needs to understand sample, batch, event
+partitioning:
+
+- Consider the `Exp` `Bijector` applied to a `Tensor` which has sample, batch,
+  and event (S, B, E) shape semantics.  Suppose
+  the `Tensor`'s partitioned-shape is `(S=[4], B=[2], E=[3, 3])`.
+
+  For `Exp`, the shape of the `Tensor` returned by `forward` and `inverse` is
+  unchanged, i.e., `[4, 2, 3, 3]`. However the shape returned by
+  `inverse_log_det_jacobian` is `[4, 2]` because the Jacobian is a reduction
+  over the event dimensions.
+
+Subclass Requirements:
+
+- Subclasses are expected to implement `_forward` and one or both of:
+    - `_inverse`, `_inverse_log_det_jacobian`,
+    - `_inverse_and_inverse_log_det_jacobian`.
+
+- If computation can be shared among `_inverse` and
+  `_inverse_log_det_jacobian` it is preferable to implement
+  `_inverse_and_inverse_log_det_jacobian`. This usually reduces
+  graph-construction overhead because a `Distribution`'s implementation of
+  `log_prob` will need to evaluate both the inverse Jacobian as well as the
+  inverse function.
+
+- If an additional use case needs just `inverse` or just
+  `inverse_log_det_jacobian` then he or she may also wish to implement these
+  functions to avoid computing the `inverse_log_det_jacobian` or the
+  `inverse`, respectively.
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.__init__(batch_ndims=None, event_ndims=None, parameters=None, is_constant_jacobian=False, validate_args=False, dtype=None, name=None)` {#Bijector.__init__}
+
+Constructs Bijector.
+
+A `Bijector` transforms random variables into new random variables.
+
+Examples:
+
+```python
+# Create the Y = g(X) = X transform which operates on 4-Tensors of vectors.
+identity = Identity(batch_ndims=4, event_ndims=1)
+
+# Create the Y = g(X) = exp(X) transform which operates on matrices.
+exp = Exp(batch_ndims=0, event_ndims=2)
+```
+
+See `Bijector` subclass docstring for more details and specific examples.
+
+##### Args:
+
+
+*  <b>`batch_ndims`</b>: number of dimensions associated with batch coordinates.
+*  <b>`event_ndims`</b>: number of dimensions associated with event coordinates.
+*  <b>`parameters`</b>: Dictionary of parameters used by this `Bijector`
+*  <b>`is_constant_jacobian`</b>: `Boolean` indicating that the Jacobian is not a
+    function of the input.
+*  <b>`validate_args`</b>: `Boolean`, default `False`.  Whether to validate input with
+    asserts. If `validate_args` is `False`, and the inputs are invalid,
+    correct behavior is not guaranteed.
+*  <b>`dtype`</b>: `tf.dtype` supported by this `Bijector`. `None` means dtype is not
+    enforced.
+*  <b>`name`</b>: The name to give Ops created by the initializer.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.dtype` {#Bijector.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.forward(x, name='forward')` {#Bijector.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.inverse(x, name='inverse')` {#Bijector.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Bijector.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Bijector.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.is_constant_jacobian` {#Bijector.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.name` {#Bijector.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.parameters` {#Bijector.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.shaper` {#Bijector.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Bijector.validate_args` {#Bijector.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md
index 51bc30bc01e..3c02fd755f2 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md
@@ -42,7 +42,7 @@ Instructions for updating:
       [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
       target classes for the associated prediction. Commonly, N=1 and `labels`
       has shape [batch_size, num_labels]. [D1, ... DN] must match
-      `predictions_idx`. Values should be in range [0, num_classes], where
+      `predictions`. Values should be in range [0, num_classes], where
       num_classes is the last dimension of `predictions`.
     k: Integer, k for @k metric.
     class_id: Integer class ID for which we want binary metrics. This should be
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.summary.tensor_summary.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.summary.tensor_summary.md
index 61f16181b53..3fb19c26013 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.summary.tensor_summary.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.summary.tensor_summary.md
@@ -1,4 +1,4 @@
-### `tf.summary.tensor_summary(display_name, tensor, description='', labels=None, collections=None, name=None)` {#tensor_summary}
+### `tf.summary.tensor_summary(name, tensor, summary_description=None, collections=None)` {#tensor_summary}
 
 Outputs a `Summary` protocol buffer with a serialized tensor.proto.
 
@@ -9,19 +9,12 @@ has one summary value containing the input tensor.
 ##### Args:
 
 
-*  <b>`display_name`</b>: A name to associate with the data series. Will be used to
-    organize output data and as a name in visualizers.
+*  <b>`name`</b>: A name for the generated node. Will also serve as the series name in
+    TensorBoard.
 *  <b>`tensor`</b>: A tensor of any type and shape to serialize.
-*  <b>`description`</b>: An optional long description of the data being output.
-*  <b>`labels`</b>: a list of strings used to specify how the data can be interpreted,
-    for example:
-    * `'encoding:image/jpg'` for a string tensor containing jpg images
-    * `'encoding:proto/X/Y/foo.proto'` for a string tensor containing Foos
-    * `'group:$groupName/$roleInGroup'` for a tensor that is related to
-       other tensors that are all in a group. (e.g. bounding boxes and images)
+*  <b>`summary_description`</b>: Optional summary_pb2.SummaryDescription()
 *  <b>`collections`</b>: Optional list of graph collections keys. The new summary op is
     added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
-*  <b>`name`</b>: An optional name for the generated node (optional).
 
 ##### Returns:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
index 27414dd7308..10897cfe667 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
@@ -65,10 +65,12 @@ Initialize a batch of Binomial distributions.
 *  <b>`logits`</b>: Floating point tensor representing the log-odds of a
     positive event with shape broadcastable to `[N1,..., Nm]` `m >= 0`, and
     the same dtype as `n`. Each entry represents logits for the probability
-    of success for independent Binomial distributions.
+    of success for independent Binomial distributions. Only one of
+    `logits` or `p` should be passed in.
 *  <b>`p`</b>: Positive floating point tensor with shape broadcastable to
     `[N1,..., Nm]` `m >= 0`, `p in [0, 1]`. Each entry represents the
-    probability of success for independent Binomial distributions.
+    probability of success for independent Binomial distributions. Only one
+    of `logits` or `p` should be passed in.
 *  <b>`validate_args`</b>: `Boolean`, default `False`.  Whether to assert valid values
     for parameters `n`, `p`, and `x` in `prob` and `log_prob`.
     If `False` and inputs are invalid, correct behavior is not guaranteed.
@@ -371,7 +373,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
 
 #### `tf.contrib.distributions.Binomial.logits` {#Binomial.logits}
 
-Log-odds.
+Log-odds of success.
 
 
 - - -
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md
index bfc40da6ceb..15e6b46e834 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md
@@ -73,12 +73,13 @@ Initialize a batch of Multinomial distributions.
 *  <b>`logits`</b>: Floating point tensor representing the log-odds of a
     positive event with shape broadcastable to `[N1,..., Nm, k], m >= 0`,
     and the same dtype as `n`. Defines this as a batch of `N1 x ... x Nm`
-    different `k` class Multinomial distributions.
+    different `k` class Multinomial distributions. Only one of `logits` or
+    `p` should be passed in.
 *  <b>`p`</b>: Positive floating point tensor with shape broadcastable to
     `[N1,..., Nm, k]` `m >= 0` and same dtype as `n`.  Defines this as
     a batch of `N1 x ... x Nm` different `k` class Multinomial
     distributions. `p`'s components in the last portion of its shape should
-    sum up to 1.
+    sum up to 1. Only one of `logits` or `p` should be passed in.
 *  <b>`validate_args`</b>: `Boolean`, default `False`.  Whether to assert valid
     values for parameters `n` and `p`, and `x` in `prob` and `log_prob`.
     If `False`, correct behavior is not guaranteed.
@@ -384,7 +385,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
 
 #### `tf.contrib.distributions.Multinomial.logits` {#Multinomial.logits}
 
-Log-odds.
+Vector of coordinatewise logits.
 
 
 - - -
@@ -419,7 +420,9 @@ Name prepended to all ops created by this `Distribution`.
 
 #### `tf.contrib.distributions.Multinomial.p` {#Multinomial.p}
 
-Event probabilities.
+Vector of probabilities summing to one.
+
+Each element is the probability of drawing that coordinate.
 
 
 - - -
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md
new file mode 100644
index 00000000000..7f1246b9646
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md
@@ -0,0 +1,218 @@
+Bijector which computes Y = g(X; loc, scale) = scale * X + loc.
+
+Example Use:
+
+```python
+# No batch, scalar.
+mu = 0     # shape=[]
+sigma = 1  # shape=[]
+b = ScaleAndShift(loc=mu, scale=sigma)
+# b.shaper.batch_ndims == 0
+# b.shaper.event_ndims == 0
+
+# One batch, scalar.
+mu = ...    # shape=[b], b>0
+sigma = ... # shape=[b], b>0
+b = ScaleAndShift(loc=mu, scale=sigma)
+# b.shaper.batch_ndims == 1
+# b.shaper.event_ndims == 0
+
+# No batch, multivariate.
+mu = ...    # shape=[d],    d>0
+sigma = ... # shape=[d, d], d>0
+b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
+# b.shaper.batch_ndims == 0
+# b.shaper.event_ndims == 1
+
+# (B1*B2*...*Bb)-batch, multivariate.
+mu = ...    # shape=[B1,...,Bb, d],    b>0, d>0
+sigma = ... # shape=[B1,...,Bb, d, d], b>0, d>0
+b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
+# b.shaper.batch_ndims == b
+# b.shaper.event_ndims == 1
+
+# Mu is broadcast:
+mu = 1
+sigma = [I, I]  # I is a 3x3 identity matrix.
+b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1)
+x = numpy.ones(S + sigma.shape)
+b.forward(x) # == x + 1
+```
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.__init__(loc, scale, event_ndims=0, validate_args=False, name='ScaleAndShift')` {#ScaleAndShift.__init__}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.dtype` {#ScaleAndShift.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.forward(x, name='forward')` {#ScaleAndShift.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse(x, name='inverse')` {#ScaleAndShift.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#ScaleAndShift.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#ScaleAndShift.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.is_constant_jacobian` {#ScaleAndShift.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.loc` {#ScaleAndShift.loc}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.name` {#ScaleAndShift.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.parameters` {#ScaleAndShift.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.scale` {#ScaleAndShift.scale}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.shaper` {#ScaleAndShift.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.ScaleAndShift.validate_args` {#ScaleAndShift.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
index 6d698594f57..99899f1421c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
@@ -9,8 +9,15 @@ Constructs an Estimator instance.
 
 
 *  <b>`model_fn`</b>: Model function, takes features and targets tensors or dicts of
-            tensors and returns predictions and loss tensors.
-            Supports next three signatures for the function:
+            tensors and returns tuple of:
+
+      * predictions: `Tensor`, `SparseTensor` or dictionary of same.
+          Can also be any type that is convertible to a `Tensor` or
+          `SparseTensor`, or dictionary of same.
+      * loss: Scalar loss `Tensor`.
+      * train_op: Training update `Tensor` or `Operation`.
+
+     Supports next three signatures for the function:
 
       * `(features, targets) -> (predictions, loss, train_op)`
       * `(features, targets, mode) -> (predictions, loss, train_op)`
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.training.rejection_sample.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.training.rejection_sample.md
new file mode 100644
index 00000000000..fe3c9866e8d
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.training.rejection_sample.md
@@ -0,0 +1,57 @@
+### `tf.contrib.training.rejection_sample(tensors, accept_prob_fn, batch_size, queue_threads=1, enqueue_many=False, prebatch_capacity=16, prebatch_threads=1, runtime_checks=False, name=None)` {#rejection_sample}
+
+Stochastically creates batches by rejection sampling.
+
+Each list of non-batched tensors is evaluated by `accept_prob_fn`, to produce
+a scalar tensor between 0 and 1. This tensor corresponds to the probability of
+being accepted. When `batch_size` tensor groups have been accepted, the batch
+queue will return a mini-batch.
+
+##### Args:
+
+
+*  <b>`tensors`</b>: List of tensors for data. All tensors are either one item or a
+      batch, according to enqueue_many.
+*  <b>`accept_prob_fn`</b>: A python lambda that takes a non-batch tensor from each
+      item in `tensors`, and produces a scalar tensor.
+*  <b>`batch_size`</b>: Size of batch to be returned.
+*  <b>`queue_threads`</b>: The number of threads for the queue that will hold the final
+    batch.
+*  <b>`enqueue_many`</b>: Bool. If true, interpret input tensors as having a batch
+      dimension.
+*  <b>`prebatch_capacity`</b>: Capacity for the large queue that is used to convert
+    batched tensors to single examples.
+*  <b>`prebatch_threads`</b>: Number of threads for the large queue that is used to
+    convert batched tensors to single examples.
+*  <b>`runtime_checks`</b>: Bool. If true, insert runtime checks on the output of
+      `accept_prob_fn`. Using `True` might have a performance impact.
+*  <b>`name`</b>: Optional prefix for ops created by this function.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: enqueue_many is True and labels doesn't have a batch
+      dimension, or if enqueue_many is False and labels isn't a scalar.
+*  <b>`ValueError`</b>: enqueue_many is True, and batch dimension on data and labels
+      don't match.
+*  <b>`ValueError`</b>: if a zero initial probability class has a nonzero target
+      probability.
+
+##### Returns:
+
+  A list of tensors of the same length as `tensors`, with batch dimension
+  `batch_size`.
+
+##### Example:
+
+  # Get tensor for a single data and label example.
+  data, label = data_provider.Get(['data', 'label'])
+
+  # Get stratified batch according to data tensor.
+  accept_prob_fn = lambda x: (tf.tanh(x[0]) + 1) / 2
+  data_batch = tf.contrib.training.rejection_sample(
+      [data, label], accept_prob_fn, 16)
+
+  # Run batch through network.
+  ...
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md
index 02dd663694b..97a2f4d2b86 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md
@@ -274,7 +274,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
 
 #### `tf.contrib.distributions.BernoulliWithSigmoidP.logits` {#BernoulliWithSigmoidP.logits}
 
-
+Log-odds of success.
 
 
 - - -
@@ -306,7 +306,7 @@ Name prepended to all ops created by this `Distribution`.
 
 #### `tf.contrib.distributions.BernoulliWithSigmoidP.p` {#BernoulliWithSigmoidP.p}
 
-
+Probability of success.
 
 
 - - -
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Softplus.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Softplus.md
new file mode 100644
index 00000000000..16313d2e851
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Softplus.md
@@ -0,0 +1,188 @@
+Bijector which computes `Y = g(X) = Log[1 + exp(X)]`.
+
+The softplus `Bijector` has the following two useful properties:
+
+* The domain is the positive real numbers
+* `softplus(x) approx x`, for large `x`, so it does not overflow as easily as
+  the `Exp` `Bijector`.
+
+  Example Use:
+
+  ```python
+  # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1
+  # batch ndim and 2 event ndims (i.e., vector of matrices).
+  softplus = Softplus(batch_ndims=1, event_ndims=2)
+  x = [[[1., 2],
+         [3, 4]],
+        [[5, 6],
+         [7, 8]]]
+  log(1 + exp(x)) == softplus.forward(x)
+  log(exp(x) - 1) == softplus.inverse(x)
+  ```
+
+  Note: log(.) and exp(.) are applied element-wise but the Jacobian is a
+  reduction over the event space.
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.__init__(event_ndims=0, validate_args=False, name='Softplus')` {#Softplus.__init__}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.dtype` {#Softplus.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.forward(x, name='forward')` {#Softplus.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.inverse(x, name='inverse')` {#Softplus.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Softplus.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Softplus.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.is_constant_jacobian` {#Softplus.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.name` {#Softplus.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.parameters` {#Softplus.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.shaper` {#Softplus.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Softplus.validate_args` {#Softplus.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.losses.mean_squared_error.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.losses.mean_squared_error.md
index 1d9a3a4ad84..87ebea3329a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.losses.mean_squared_error.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.losses.mean_squared_error.md
@@ -1,30 +1,31 @@
-### `tf.contrib.losses.mean_squared_error(*args, **kwargs)` {#mean_squared_error}
+### `tf.contrib.losses.mean_squared_error(predictions, targets, weight=1.0, scope=None)` {#mean_squared_error}
 
-Adds a Sum-of-Squares loss to the training procedure. (deprecated)
+Adds a Sum-of-Squares loss to the training procedure.
 
-THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01.
-Instructions for updating:
-Use mean_squared_error.
+`weight` acts as a coefficient for the loss. If a scalar is provided, then the
+loss is simply scaled by the given value. If `weight` is a tensor of size
+[batch_size], then the total loss for each sample of the batch is rescaled
+by the corresponding element in the `weight` vector. If the shape of
+`weight` matches the shape of `predictions`, then the loss of each
+measurable element of `predictions` is scaled by the corresponding value of
+`weight`.
 
-  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
-  loss is simply scaled by the given value. If `weight` is a tensor of size
-  [batch_size], then the total loss for each sample of the batch is rescaled
-  by the corresponding element in the `weight` vector. If the shape of
-  `weight` matches the shape of `predictions`, then the loss of each
-  measurable element of `predictions` is scaled by the corresponding value of
-  `weight`.
+##### Args:
 
-  Args:
-    predictions: The predicted outputs.
-    targets: The ground truth output tensor, same dimensions as 'predictions'.
-    weight: Coefficients for the loss a scalar, a tensor of shape
-      [batch_size] or a tensor whose shape matches `predictions`.
-    scope: The scope for the operations performed in computing the loss.
 
-  Returns:
-    A scalar `Tensor` representing the loss value.
+*  <b>`predictions`</b>: The predicted outputs.
+*  <b>`targets`</b>: The ground truth output tensor, same dimensions as 'predictions'.
+*  <b>`weight`</b>: Coefficients for the loss a scalar, a tensor of shape
+    [batch_size] or a tensor whose shape matches `predictions`.
+*  <b>`scope`</b>: The scope for the operations performed in computing the loss.
 
-  Raises:
-    ValueError: If the shape of `predictions` doesn't match that of `targets` or
-      if the shape of `weight` is invalid.
+##### Returns:
+
+  A scalar `Tensor` representing the loss value.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If the shape of `predictions` doesn't match that of `targets` or
+    if the shape of `weight` is invalid.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md
index 68ba0ee73bf..24e2d3d8b5a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md
@@ -1,6 +1,10 @@
 ### `tf.contrib.metrics.streaming_recall_at_k(*args, **kwargs)` {#streaming_recall_at_k}
 
-Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments)
+Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments) (deprecated)
+
+THIS FUNCTION IS DEPRECATED. It will be removed after 2016-11-08.
+Instructions for updating:
+Please use `streaming_sparse_recall_at_k`, and reshape labels from [batch_size] to [batch_size, 1].
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19.
 Instructions for updating:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md
new file mode 100644
index 00000000000..b714ac42381
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md
@@ -0,0 +1,182 @@
+Bijector which computes Y = g(X) = exp(X).
+
+Example Use:
+
+```python
+# Create the Y=g(X)=exp(X) transform which works only on Tensors with 1
+# batch ndim and 2 event ndims (i.e., vector of matrices).
+exp = Exp(batch_ndims=1, event_ndims=2)
+x = [[[1., 2],
+       [3, 4]],
+      [[5, 6],
+       [7, 8]]]
+exp(x) == exp.forward(x)
+log(x) == exp.inverse(x)
+```
+
+Note: the exp(.) is applied element-wise but the Jacobian is a reduction
+over the event space.
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.__init__(event_ndims=0, validate_args=False, name='Exp')` {#Exp.__init__}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.dtype` {#Exp.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.forward(x, name='forward')` {#Exp.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.inverse(x, name='inverse')` {#Exp.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Exp.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Exp.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.is_constant_jacobian` {#Exp.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.name` {#Exp.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.parameters` {#Exp.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.shaper` {#Exp.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Exp.validate_args` {#Exp.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.bijector.Identity.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.bijector.Identity.md
new file mode 100644
index 00000000000..8f7f3c4f2f3
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.bijector.Identity.md
@@ -0,0 +1,176 @@
+Bijector which computes Y = g(X) = X.
+
+Example Use:
+
+```python
+# Create the Y=g(X)=X transform which is intended for Tensors with 1 batch
+# ndim and 1 event ndim (i.e., vector of vectors).
+identity = Identity(batch_ndims=1, event_ndims=1)
+x = [[1., 2],
+     [3, 4]]
+x == identity.forward(x) == identity.inverse(x)
+```
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.__init__(validate_args=False, name='Identity')` {#Identity.__init__}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.dtype` {#Identity.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.forward(x, name='forward')` {#Identity.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.inverse(x, name='inverse')` {#Identity.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Identity.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Identity.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.is_constant_jacobian` {#Identity.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.name` {#Identity.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.parameters` {#Identity.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.shaper` {#Identity.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Identity.validate_args` {#Identity.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.losses.sum_of_squares.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.losses.sum_of_squares.md
deleted file mode 100644
index 05f13bf638c..00000000000
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.losses.sum_of_squares.md
+++ /dev/null
@@ -1,30 +0,0 @@
-### `tf.contrib.losses.sum_of_squares(*args, **kwargs)` {#sum_of_squares}
-
-Adds a Sum-of-Squares loss to the training procedure. (deprecated)
-
-THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01.
-Instructions for updating:
-Use mean_squared_error.
-
-  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
-  loss is simply scaled by the given value. If `weight` is a tensor of size
-  [batch_size], then the total loss for each sample of the batch is rescaled
-  by the corresponding element in the `weight` vector. If the shape of
-  `weight` matches the shape of `predictions`, then the loss of each
-  measurable element of `predictions` is scaled by the corresponding value of
-  `weight`.
-
-  Args:
-    predictions: The predicted outputs.
-    targets: The ground truth output tensor, same dimensions as 'predictions'.
-    weight: Coefficients for the loss a scalar, a tensor of shape
-      [batch_size] or a tensor whose shape matches `predictions`.
-    scope: The scope for the operations performed in computing the loss.
-
-  Returns:
-    A scalar `Tensor` representing the loss value.
-
-  Raises:
-    ValueError: If the shape of `predictions` doesn't match that of `targets` or
-      if the shape of `weight` is invalid.
-
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.summary.scalar.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.summary.scalar.md
index 020c9c060df..979cc1840c6 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.summary.scalar.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.summary.scalar.md
@@ -1,4 +1,4 @@
-### `tf.summary.scalar(display_name, tensor, description='', labels=None, collections=None, name=None)` {#scalar}
+### `tf.summary.scalar(name, tensor, summary_description=None, collections=None)` {#scalar}
 
 Outputs a `Summary` protocol buffer containing a single scalar value.
 
@@ -7,14 +7,12 @@ The generated Summary has a Tensor.proto containing the input Tensor.
 ##### Args:
 
 
-*  <b>`display_name`</b>: A name to associate with the data series. Will be used to
-    organize output data and as a name in visualizers.
+*  <b>`name`</b>: A name for the generated node. Will also serve as the series name in
+    TensorBoard.
 *  <b>`tensor`</b>: A tensor containing a single floating point or integer value.
-*  <b>`description`</b>: An optional long description of the data being output.
-*  <b>`labels`</b>: a list of strings used to attach metadata.
+*  <b>`summary_description`</b>: Optional summary_description_pb2.SummaryDescription
 *  <b>`collections`</b>: Optional list of graph collections keys. The new summary op is
     added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
-*  <b>`name`</b>: An optional name for the generated node (optional).
 
 ##### Returns:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md
new file mode 100644
index 00000000000..439988379b5
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md
@@ -0,0 +1,189 @@
+Bijector constructed from callables implementing forward, inverse, and inverse_log_det_jacobian.
+
+Example Use:
+
+```python
+exp = Inline(
+  forward_fn=tf.exp,
+  inverse_fn=tf.log,
+  inverse_log_det_jacobian_fn=(
+    lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)),
+  name="Exp")
+```
+
+The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`.
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.__init__(forward_fn, inverse_fn, inverse_log_det_jacobian_fn, is_constant_jacobian=False, name='Inline')` {#Inline.__init__}
+
+Creates a `Bijector` from callables.
+
+##### Args:
+
+
+*  <b>`forward_fn`</b>: Python callable implementing the forward transformation.
+*  <b>`inverse_fn`</b>: Python callable implementing the inverse transformation.
+*  <b>`inverse_log_det_jacobian_fn`</b>: Python callable implementing the
+    inverse_log_det_jacobian transformation.
+*  <b>`is_constant_jacobian`</b>: `Boolean` indicating that the Jacobian is constant
+    for all input arguments.
+*  <b>`name`</b>: `String`, name given to ops managed by this object.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.dtype` {#Inline.dtype}
+
+dtype of `Tensor`s transformable by this distribution.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.forward(x, name='forward')` {#Inline.forward}
+
+Returns the forward `Bijector` evaluation, i.e., X = g(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "forward" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if `_forward` is not implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.inverse(x, name='inverse')` {#Inline.inverse}
+
+Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y).
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Inline.inverse_and_inverse_log_det_jacobian}
+
+Returns both the inverse evaluation and inverse_log_det_jacobian.
+
+Enables possibly more efficient calculation when both inverse and
+corresponding Jacobian are needed.
+
+See `inverse()`, `inverse_log_det_jacobian()` for more details.
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_and_inverse_log_det_jacobian`
+    nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Inline.inverse_log_det_jacobian}
+
+Returns the (log o det o Jacobian o inverse)(x).
+
+Mathematically, returns: log(det(dY/dX g^{-1}))(Y).
+
+Note that forward_log_det_jacobian is the negative of this function. (See
+is_constant_jacobian for related proof.)
+
+##### Args:
+
+
+*  <b>`x`</b>: `Tensor`. The input to the "inverse" Jacobian evaluation.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `self.dtype` is specified and `x.dtype` is not
+    `self.dtype`.
+*  <b>`NotImplementedError`</b>: if neither `_inverse_log_det_jacobian` nor
+    `_inverse_and_inverse_log_det_jacobian` are implemented.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.is_constant_jacobian` {#Inline.is_constant_jacobian}
+
+Returns true iff the Jacobian is not a function of x.
+
+Note: Jacobian is either constant for both forward and inverse or neither.
+
+##### Returns:
+
+  `Boolean`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.name` {#Inline.name}
+
+Returns the string name of this `Bijector`.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.parameters` {#Inline.parameters}
+
+Returns this `Bijector`'s parameters as a name/value dictionary.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.shaper` {#Inline.shaper}
+
+Returns shape object used to manage shape constraints.
+
+
+- - -
+
+#### `tf.contrib.distributions.bijector.Inline.validate_args` {#Inline.validate_args}
+
+Returns True if Tensor arguments will be validated.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_sparse_average_precision_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_sparse_average_precision_at_k.md
index 396806e8963..bf0893bc5f3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_sparse_average_precision_at_k.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_sparse_average_precision_at_k.md
@@ -33,7 +33,7 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
     [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of
     target classes for the associated prediction. Commonly, N=1 and `labels`
     has shape [batch_size, num_labels]. [D1, ... DN] must match
-    `predictions_idx`. Values should be in range [0, num_classes], where
+    `predictions`. Values should be in range [0, num_classes], where
     num_classes is the last dimension of `predictions`.
 *  <b>`k`</b>: Integer, k for @k metric. This will calculate an average precision for
     range `[1,k]`, as documented above.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.SummarySaverHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.SummarySaverHook.md
index 2c879bdb0da..028c38057b7 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.SummarySaverHook.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.SummarySaverHook.md
@@ -1,14 +1,16 @@
 Saves summaries every N steps.
 - - -
 
-#### `tf.train.SummarySaverHook.__init__(save_steps=100, output_dir=None, summary_writer=None, scaffold=None, summary_op=None)` {#SummarySaverHook.__init__}
+#### `tf.train.SummarySaverHook.__init__(save_steps=100, save_secs=None, output_dir=None, summary_writer=None, scaffold=None, summary_op=None)` {#SummarySaverHook.__init__}
 
 Initializes a `SummarySaver` monitor.
 
 ##### Args:
 
 
-*  <b>`save_steps`</b>: `int`, save summaries every N steps. See `EveryN`.
+*  <b>`save_steps`</b>: `int`, save summaries every N steps. Exactly one of
+      `save_secs` and `save_steps` should be set.
+*  <b>`save_secs`</b>: `int`, save summaries every N seconds.
 *  <b>`output_dir`</b>: `string`, the directory to save the summaries to. Only used
       if no `summary_writer` is supplied.
 *  <b>`summary_writer`</b>: `SummaryWriter`. If `None` and an `output_dir` was passed,
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.losses.mean_pairwise_squared_error.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.losses.mean_pairwise_squared_error.md
index d2785ed69e8..3b7668eb67a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.losses.mean_pairwise_squared_error.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.losses.mean_pairwise_squared_error.md
@@ -1,44 +1,45 @@
-### `tf.contrib.losses.mean_pairwise_squared_error(*args, **kwargs)` {#mean_pairwise_squared_error}
+### `tf.contrib.losses.mean_pairwise_squared_error(predictions, targets, weight=1.0, scope=None)` {#mean_pairwise_squared_error}
 
-Adds a pairwise-errors-squared loss to the training procedure. (deprecated)
+Adds a pairwise-errors-squared loss to the training procedure.
 
-THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01.
-Instructions for updating:
-Use mean_pairwise_squared_error.
+Unlike `mean_squared_error`, which is a measure of the differences between
+corresponding elements of `predictions` and `targets`,
+`mean_pairwise_squared_error` is a measure of the differences between pairs of
+corresponding elements of `predictions` and `targets`.
 
-  Unlike the sum_of_squares loss, which is a measure of the differences between
-  corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares
-  is a measure of the differences between pairs of corresponding elements of
-  `predictions` and `targets`.
+For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are
+three pairs of differences are summed to compute the loss:
+  loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3
 
-  For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are
-  three pairs of differences are summed to compute the loss:
-    loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3
+Note that since the inputs are of size [batch_size, d0, ... dN], the
+corresponding pairs are computed within each batch sample but not across
+samples within a batch. For example, if `predictions` represents a batch of
+16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
+is drawn from each image, but not across images.
 
-  Note that since the inputs are of size [batch_size, d0, ... dN], the
-  corresponding pairs are computed within each batch sample but not across
-  samples within a batch. For example, if `predictions` represents a batch of
-  16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs
-  is drawn from each image, but not across images.
+`weight` acts as a coefficient for the loss. If a scalar is provided, then the
+loss is simply scaled by the given value. If `weight` is a tensor of size
+[batch_size], then the total loss for each sample of the batch is rescaled
+by the corresponding element in the `weight` vector.
 
-  `weight` acts as a coefficient for the loss. If a scalar is provided, then the
-  loss is simply scaled by the given value. If `weight` is a tensor of size
-  [batch_size], then the total loss for each sample of the batch is rescaled
-  by the corresponding element in the `weight` vector.
+##### Args:
 
-  Args:
-    predictions: The predicted outputs, a tensor of size [batch_size, d0, .. dN]
-      where N+1 is the total number of dimensions in `predictions`.
-    targets: The ground truth output tensor, whose shape must match the shape of
-      the `predictions` tensor.
-    weight: Coefficients for the loss a scalar, a tensor of shape [batch_size]
-      or a tensor whose shape matches `predictions`.
-    scope: The scope for the operations performed in computing the loss.
 
-  Returns:
-    A scalar `Tensor` representing the loss value.
+*  <b>`predictions`</b>: The predicted outputs, a tensor of size [batch_size, d0, .. dN]
+    where N+1 is the total number of dimensions in `predictions`.
+*  <b>`targets`</b>: The ground truth output tensor, whose shape must match the shape of
+    the `predictions` tensor.
+*  <b>`weight`</b>: Coefficients for the loss a scalar, a tensor of shape [batch_size]
+    or a tensor whose shape matches `predictions`.
+*  <b>`scope`</b>: The scope for the operations performed in computing the loss.
 
-  Raises:
-    ValueError: If the shape of `predictions` doesn't match that of `targets` or
-      if the shape of `weight` is invalid.
+##### Returns:
+
+  A scalar `Tensor` representing the loss value.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If the shape of `predictions` doesn't match that of `targets` or
+    if the shape of `weight` is invalid.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.eye.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.eye.md
new file mode 100644
index 00000000000..b71edf9b969
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.eye.md
@@ -0,0 +1,36 @@
+### `tf.eye(num_rows, num_columns=None, batch_shape=None, dtype=tf.float32, name=None)` {#eye}
+
+Construct an identity matrix, or a batch of matrices.
+
+```python
+# Construct one identity matrix.
+tf.eye(2)
+==> [[1., 0.],
+     [0., 1.]]
+
+# Construct a batch of 3 identity matricies, each 2 x 2.
+# batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2.
+batch_identity = tf.eye(2, batch_shape=[3])
+
+# Construct one 2 x 3 "identity" matrix
+tf.eye(2, num_columns=3)
+==> [[ 1.,  0.,  0.],
+     [ 0.,  1.,  0.]]
+```
+
+##### Args:
+
+
+*  <b>`num_rows`</b>: Non-negative `int32` scalar `Tensor` giving the number of rows
+    in each batch matrix.
+*  <b>`num_columns`</b>: Optional non-negative `int32` scalar `Tensor` giving the number
+    of columns in each batch matrix.  Defaults to `num_rows`.
+*  <b>`batch_shape`</b>: `int32` `Tensor`.  If provided, returned `Tensor` will have
+    leading batch dimensions of this shape.
+*  <b>`dtype`</b>: The type of an element in the resulting `Tensor`
+*  <b>`name`</b>: A name for this `Op`.  Defaults to "eye".
+
+##### Returns:
+
+  A `Tensor` of shape `batch_shape + [num_rows, num_columns]`
+
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index a6b099dff95..86655fbc451 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -198,6 +198,7 @@
   * [`erf`](../../api_docs/python/math_ops.md#erf)
   * [`erfc`](../../api_docs/python/math_ops.md#erfc)
   * [`exp`](../../api_docs/python/math_ops.md#exp)
+  * [`eye`](../../api_docs/python/math_ops.md#eye)
   * [`fft`](../../api_docs/python/math_ops.md#fft)
   * [`fft2d`](../../api_docs/python/math_ops.md#fft2d)
   * [`fft3d`](../../api_docs/python/math_ops.md#fft3d)
@@ -741,6 +742,14 @@
   * [`WishartCholesky`](../../api_docs/python/contrib.distributions.md#WishartCholesky)
   * [`WishartFull`](../../api_docs/python/contrib.distributions.md#WishartFull)
 
+* **[Random variable transformations (contrib)](../../api_docs/python/contrib.distributions.bijector.md)**:
+  * [`Bijector`](../../api_docs/python/contrib.distributions.bijector.md#Bijector)
+  * [`Exp`](../../api_docs/python/contrib.distributions.bijector.md#Exp)
+  * [`Identity`](../../api_docs/python/contrib.distributions.bijector.md#Identity)
+  * [`Inline`](../../api_docs/python/contrib.distributions.bijector.md#Inline)
+  * [`ScaleAndShift`](../../api_docs/python/contrib.distributions.bijector.md#ScaleAndShift)
+  * [`Softplus`](../../api_docs/python/contrib.distributions.bijector.md#Softplus)
+
 * **[FFmpeg (contrib)](../../api_docs/python/contrib.ffmpeg.md)**:
   * [`decode_audio`](../../api_docs/python/contrib.ffmpeg.md#decode_audio)
   * [`encode_audio`](../../api_docs/python/contrib.ffmpeg.md#encode_audio)
@@ -946,8 +955,6 @@
   * [`sigmoid_cross_entropy`](../../api_docs/python/contrib.losses.md#sigmoid_cross_entropy)
   * [`softmax_cross_entropy`](../../api_docs/python/contrib.losses.md#softmax_cross_entropy)
   * [`sparse_softmax_cross_entropy`](../../api_docs/python/contrib.losses.md#sparse_softmax_cross_entropy)
-  * [`sum_of_pairwise_squares`](../../api_docs/python/contrib.losses.md#sum_of_pairwise_squares)
-  * [`sum_of_squares`](../../api_docs/python/contrib.losses.md#sum_of_squares)
 
 * **[RNN (contrib)](../../api_docs/python/contrib.rnn.md)**:
   * [`AttentionCellWrapper`](../../api_docs/python/contrib.rnn.md#AttentionCellWrapper)
@@ -1002,6 +1009,7 @@
   * [`bucket`](../../api_docs/python/contrib.training.md#bucket)
   * [`bucket_by_sequence_length`](../../api_docs/python/contrib.training.md#bucket_by_sequence_length)
   * [`NextQueuedSequenceBatch`](../../api_docs/python/contrib.training.md#NextQueuedSequenceBatch)
+  * [`rejection_sample`](../../api_docs/python/contrib.training.md#rejection_sample)
   * [`resample_at_rate`](../../api_docs/python/contrib.training.md#resample_at_rate)
   * [`SequenceQueueingStateSaver`](../../api_docs/python/contrib.training.md#SequenceQueueingStateSaver)
   * [`stratified_sample`](../../api_docs/python/contrib.training.md#stratified_sample)
diff --git a/tensorflow/g3doc/api_docs/python/math_ops.md b/tensorflow/g3doc/api_docs/python/math_ops.md
index ad1126474de..69d8bac2b63 100644
--- a/tensorflow/g3doc/api_docs/python/math_ops.md
+++ b/tensorflow/g3doc/api_docs/python/math_ops.md
@@ -1131,6 +1131,45 @@ tf.transpose(x, perm=[0, 2, 1]) ==> [[[1  4]
 
 
 
+- - -
+
+### `tf.eye(num_rows, num_columns=None, batch_shape=None, dtype=tf.float32, name=None)` {#eye}
+
+Construct an identity matrix, or a batch of matrices.
+
+```python
+# Construct one identity matrix.
+tf.eye(2)
+==> [[1., 0.],
+     [0., 1.]]
+
+# Construct a batch of 3 identity matricies, each 2 x 2.
+# batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2.
+batch_identity = tf.eye(2, batch_shape=[3])
+
+# Construct one 2 x 3 "identity" matrix
+tf.eye(2, num_columns=3)
+==> [[ 1.,  0.,  0.],
+     [ 0.,  1.,  0.]]
+```
+
+##### Args:
+
+
+*  <b>`num_rows`</b>: Non-negative `int32` scalar `Tensor` giving the number of rows
+    in each batch matrix.
+*  <b>`num_columns`</b>: Optional non-negative `int32` scalar `Tensor` giving the number
+    of columns in each batch matrix.  Defaults to `num_rows`.
+*  <b>`batch_shape`</b>: `int32` `Tensor`.  If provided, returned `Tensor` will have
+    leading batch dimensions of this shape.
+*  <b>`dtype`</b>: The type of an element in the resulting `Tensor`
+*  <b>`name`</b>: A name for this `Op`.  Defaults to "eye".
+
+##### Returns:
+
+  A `Tensor` of shape `batch_shape + [num_rows, num_columns]`
+
+
 - - -
 
 ### `tf.matrix_diag(diagonal, name=None)` {#matrix_diag}
diff --git a/tensorflow/g3doc/api_docs/python/state_ops.md b/tensorflow/g3doc/api_docs/python/state_ops.md
index 3a2f45ad7fa..5c1d0ebb3a6 100644
--- a/tensorflow/g3doc/api_docs/python/state_ops.md
+++ b/tensorflow/g3doc/api_docs/python/state_ops.md
@@ -2433,7 +2433,7 @@ tensor shape, the initializer will raise a `ValueError`.
 
 
 *  <b>`ValueError`</b>: Too many elements provided. Needed at most 6, but received 8
-  ```
+```
 
 
 - - -
diff --git a/tensorflow/g3doc/api_docs/python/summary.md b/tensorflow/g3doc/api_docs/python/summary.md
index 14dac8117fe..4fbc65c0cf1 100644
--- a/tensorflow/g3doc/api_docs/python/summary.md
+++ b/tensorflow/g3doc/api_docs/python/summary.md
@@ -8,7 +8,7 @@ This module contains ops for generating summaries.
 ## Summary Ops
 - - -
 
-### `tf.summary.tensor_summary(display_name, tensor, description='', labels=None, collections=None, name=None)` {#tensor_summary}
+### `tf.summary.tensor_summary(name, tensor, summary_description=None, collections=None)` {#tensor_summary}
 
 Outputs a `Summary` protocol buffer with a serialized tensor.proto.
 
@@ -19,19 +19,12 @@ has one summary value containing the input tensor.
 ##### Args:
 
 
-*  <b>`display_name`</b>: A name to associate with the data series. Will be used to
-    organize output data and as a name in visualizers.
+*  <b>`name`</b>: A name for the generated node. Will also serve as the series name in
+    TensorBoard.
 *  <b>`tensor`</b>: A tensor of any type and shape to serialize.
-*  <b>`description`</b>: An optional long description of the data being output.
-*  <b>`labels`</b>: a list of strings used to specify how the data can be interpreted,
-    for example:
-    * `'encoding:image/jpg'` for a string tensor containing jpg images
-    * `'encoding:proto/X/Y/foo.proto'` for a string tensor containing Foos
-    * `'group:$groupName/$roleInGroup'` for a tensor that is related to
-       other tensors that are all in a group. (e.g. bounding boxes and images)
+*  <b>`summary_description`</b>: Optional summary_pb2.SummaryDescription()
 *  <b>`collections`</b>: Optional list of graph collections keys. The new summary op is
     added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
-*  <b>`name`</b>: An optional name for the generated node (optional).
 
 ##### Returns:
 
@@ -41,7 +34,7 @@ has one summary value containing the input tensor.
 
 - - -
 
-### `tf.summary.scalar(display_name, tensor, description='', labels=None, collections=None, name=None)` {#scalar}
+### `tf.summary.scalar(name, tensor, summary_description=None, collections=None)` {#scalar}
 
 Outputs a `Summary` protocol buffer containing a single scalar value.
 
@@ -50,14 +43,12 @@ The generated Summary has a Tensor.proto containing the input Tensor.
 ##### Args:
 
 
-*  <b>`display_name`</b>: A name to associate with the data series. Will be used to
-    organize output data and as a name in visualizers.
+*  <b>`name`</b>: A name for the generated node. Will also serve as the series name in
+    TensorBoard.
 *  <b>`tensor`</b>: A tensor containing a single floating point or integer value.
-*  <b>`description`</b>: An optional long description of the data being output.
-*  <b>`labels`</b>: a list of strings used to attach metadata.
+*  <b>`summary_description`</b>: Optional summary_description_pb2.SummaryDescription
 *  <b>`collections`</b>: Optional list of graph collections keys. The new summary op is
     added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
-*  <b>`name`</b>: An optional name for the generated node (optional).
 
 ##### Returns:
 
diff --git a/tensorflow/g3doc/api_docs/python/train.md b/tensorflow/g3doc/api_docs/python/train.md
index c83a9b4749d..08b1241667f 100644
--- a/tensorflow/g3doc/api_docs/python/train.md
+++ b/tensorflow/g3doc/api_docs/python/train.md
@@ -4401,14 +4401,16 @@ such as saving a last checkpoint.
 Saves summaries every N steps.
 - - -
 
-#### `tf.train.SummarySaverHook.__init__(save_steps=100, output_dir=None, summary_writer=None, scaffold=None, summary_op=None)` {#SummarySaverHook.__init__}
+#### `tf.train.SummarySaverHook.__init__(save_steps=100, save_secs=None, output_dir=None, summary_writer=None, scaffold=None, summary_op=None)` {#SummarySaverHook.__init__}
 
 Initializes a `SummarySaver` monitor.
 
 ##### Args:
 
 
-*  <b>`save_steps`</b>: `int`, save summaries every N steps. See `EveryN`.
+*  <b>`save_steps`</b>: `int`, save summaries every N steps. Exactly one of
+      `save_secs` and `save_steps` should be set.
+*  <b>`save_secs`</b>: `int`, save summaries every N seconds.
 *  <b>`output_dir`</b>: `string`, the directory to save the summaries to. Only used
       if no `summary_writer` is supplied.
 *  <b>`summary_writer`</b>: `SummaryWriter`. If `None` and an `output_dir` was passed,
diff --git a/tensorflow/g3doc/tutorials/estimators/index.md b/tensorflow/g3doc/tutorials/estimators/index.md
index bbc94637567..75909639aba 100644
--- a/tensorflow/g3doc/tutorials/estimators/index.md
+++ b/tensorflow/g3doc/tutorials/estimators/index.md
@@ -22,7 +22,7 @@ different activation functions for each neural network layer. Or maybe you're
 implementing a ranking or recommendation system, and neither a classifier nor a
 regressor is appropriate for generating predictions.
 
-This tutorial covers how to create your own Estimator using the building blocks
+This tutorial covers how to create your own `Estimator` using the building blocks
 provided in `tf.contrib.learn`, which will predict the ages of
 [abalones](https://en.wikipedia.org/wiki/Abalone) based on their physical
 measurements. You'll learn how to do the following:
@@ -237,9 +237,8 @@ nn = tf.contrib.learn.Estimator(
     that will be passed into the `model_fn`.
 
 NOTE: Just like `tf.contrib.learn`'s predefined regressors and classifiers, the
-`Estimator` initializer also accepts the following general configuration
-arguments, all of which are optional: `model_dir`, `config`, and
-`weight_column_name`.
+`Estimator` initializer also accepts the general configuration
+arguments `model_dir` and `config`.
 
 For the abalone age predictor, the model will accept one hyperparameter:
 learning rate. Define `LEARNING_RATE` as a constant at the beginning of your
diff --git a/tensorflow/g3doc/tutorials/index.md b/tensorflow/g3doc/tutorials/index.md
index c191dc88511..edc1f6b5a44 100644
--- a/tensorflow/g3doc/tutorials/index.md
+++ b/tensorflow/g3doc/tutorials/index.md
@@ -78,6 +78,14 @@ for predicting median house values.
 
 [View Tutorial](../tutorials/input_fn/index.md)
 
+### Creating Estimators in tf.contrib.learn
+
+This tutorial covers how to create your own `Estimator` using the building blocks
+provided in tf.contrib.learn. You'll build a model to predict the ages of abalones
+based on their physical measurements.
+
+[View Tutorial](../tutorials/estimators/index.md)
+
 ## TensorFlow Serving
 
 ### TensorFlow Serving
diff --git a/tensorflow/g3doc/tutorials/leftnav_files b/tensorflow/g3doc/tutorials/leftnav_files
index 6d9f6638db5..a75e62f5e36 100644
--- a/tensorflow/g3doc/tutorials/leftnav_files
+++ b/tensorflow/g3doc/tutorials/leftnav_files
@@ -9,6 +9,7 @@ wide/index.md
 wide_and_deep/index.md
 monitors/index.md
 input_fn/index.md
+estimators/index.md
 ### TensorFlow Serving
 tfserve/index.md
 ### Image Processing
diff --git a/tensorflow/g3doc/tutorials/tflearn/index.md b/tensorflow/g3doc/tutorials/tflearn/index.md
index a7cebaaba82..b6e26ee351b 100644
--- a/tensorflow/g3doc/tutorials/tflearn/index.md
+++ b/tensorflow/g3doc/tutorials/tflearn/index.md
@@ -2,21 +2,23 @@
 
 TensorFlow’s high-level machine learning API (tf.contrib.learn) makes it easy to
 configure, train, and evaluate a variety of machine learning models. In this
-tutorial, you’ll use tf.contrib.learn to construct a
-[neural network](https://en.wikipedia.org/wiki/Artificial_neural_network)
-classifier and train it on the [Iris data set](https://en.wikipedia.org/wiki/Iris_flower_data_set)
-to predict flower species based on sepal/petal geometry. You'll write code to
-perform the following five steps:
+tutorial, you’ll use tf.contrib.learn to construct a [neural
+network](https://en.wikipedia.org/wiki/Artificial_neural_network) classifier and
+train it on the [Iris data
+set](https://en.wikipedia.org/wiki/Iris_flower_data_set) to predict flower
+species based on sepal/petal geometry. You'll write code to perform the
+following five steps:
 
 1.  Load CSVs containing Iris training/test data into a TensorFlow `Dataset`
-2.  Construct a [neural network classifier](../../api_docs/python/contrib.learn.md#DNNClassifier)
+2.  Construct a [neural network
+    classifier](../../api_docs/python/contrib.learn.md#DNNClassifier)
 3.  Fit the model using the training data
 4.  Evaluate the accuracy of the model
 5.  Classify new samples
 
-NOTE: Remember to
-[install TensorFlow on your machine](../../get_started/os_setup.md#download-and-setup)
-before getting started with this tutorial.
+NOTE: Remember to [install TensorFlow on your
+machine](../../get_started/os_setup.md#download-and-setup) before getting
+started with this tutorial.
 
 ## Complete Neural Network Source Code
 
@@ -35,10 +37,14 @@ IRIS_TRAINING = "iris_training.csv"
 IRIS_TEST = "iris_test.csv"
 
 # Load datasets.
-training_set = tf.contrib.learn.datasets.base.load_csv(filename=IRIS_TRAINING,
-                                                       target_dtype=np.int)
-test_set = tf.contrib.learn.datasets.base.load_csv(filename=IRIS_TEST,
-                                                   target_dtype=np.int)
+training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
+    filename=IRIS_TRAINING,
+    target_dtype=np.int,
+    features_dtype=np.float32)
+test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
+    filename=IRIS_TEST,
+    target_dtype=np.int,
+    features_dtype=np.float32)
 
 # Specify that all features have real-value data
 feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)]
@@ -62,7 +68,7 @@ print('Accuracy: {0:f}'.format(accuracy_score))
 # Classify two new flower samples.
 new_samples = np.array(
     [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float)
-y = classifier.predict(new_samples)
+y = list(classifier.predict(new_samples, as_iterable=True))
 print('Predictions: {}'.format(str(y)))
 ```
 
@@ -87,8 +93,8 @@ and [*Iris virginica*](https://www.flickr.com/photos/33397993@N05/3352169862)
 Each row contains the following data for each flower sample:
 [sepal](https://en.wikipedia.org/wiki/Sepal) length, sepal width,
 [petal](https://en.wikipedia.org/wiki/Petal) length, petal width, and flower
-species. Flower species are represented as integers, with 0 denoting
-*Iris setosa*, 1 denoting *Iris versicolor*, and 2 denoting *Iris virginica*.
+species. Flower species are represented as integers, with 0 denoting *Iris
+setosa*, 1 denoting *Iris versicolor*, and 2 denoting *Iris virginica*.
 
 Sepal Length | Sepal Width | Petal Length | Petal Width | Species
 :----------- | :---------- | :----------- | :---------- | :-------
@@ -107,8 +113,10 @@ Sepal Length | Sepal Width | Petal Length | Petal Width | Species
 For this tutorial, the Iris data has been randomized and split into two separate
 CSVs:
 
-*   A training set of 120 samples ([iris_training.csv](http://download.tensorflow.org/data/iris_training.csv))
-*   A test set of 30 samples ([iris_test.csv](http://download.tensorflow.org/data/iris_test.csv)).
+*   A training set of 120 samples
+    ([iris_training.csv](http://download.tensorflow.org/data/iris_training.csv))
+*   A test set of 30 samples
+    ([iris_test.csv](http://download.tensorflow.org/data/iris_test.csv)).
 
 Place these files in the same directory as your Python code.
 
@@ -124,13 +132,17 @@ import numpy as np
 ```
 
 Next, load the training and test sets into `Dataset`s using the
-[`load_csv()`](https://www.tensorflow.org/code/tensorflow/contrib/learn/python/learn/datasets/base.py)
-method in `learn.datasets.base`. The `load_csv()` method takes two required
-arguments:
+[`load_csv_with_header()`](https://www.tensorflow.org/code/tensorflow/contrib/learn/python/learn/datasets/base.py)
+method in `learn.datasets.base`. The `load_csv_with_header()` method takes three
+required arguments:
 
 *   `filename`, which takes the filepath to the CSV file
-*   `target_dtype`, which takes the [`numpy` datatype](http://docs.scipy.org/doc/numpy/user/basics.types.html)
-    of the dataset's target value.
+*   `target_dtype`, which takes the [`numpy`
+    datatype](http://docs.scipy.org/doc/numpy/user/basics.types.html) of the
+    dataset's target value.
+*   `features_dtype`, which takes the [`numpy`
+    datatype](http://docs.scipy.org/doc/numpy/user/basics.types.html) of the
+    dataset's feature values.
 
 Here, the target (the value you're training the model to predict) is flower
 species, which is an integer from 0&ndash;2, so the appropriate `numpy` datatype
@@ -142,25 +154,28 @@ IRIS_TRAINING = "iris_training.csv"
 IRIS_TEST = "iris_test.csv"
 
 # Load datasets.
-training_set = tf.contrib.learn.datasets.base.load_csv(filename=IRIS_TRAINING,
-                                                       target_dtype=np.int)
-test_set = tf.contrib.learn.datasets.base.load_csv(filename=IRIS_TEST,
-                                                   target_dtype=np.int)
+training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
+    filename=IRIS_TRAINING,
+    target_dtype=np.int,
+    features_dtype=np.float32)
+test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
+    filename=IRIS_TEST,
+    target_dtype=np.int,
+    features_dtype=np.float32)
 ```
 
-`Dataset`s in tf.contrib.learn are
-[named tuples](https://docs.python.org/2/library/collections.html#collections.namedtuple);
+`Dataset`s in tf.contrib.learn are [named
+tuples](https://docs.python.org/2/library/collections.html#collections.namedtuple);
 you can access feature data and target values via the `data` and `target`
 fields. Here, `training_set.data` and `training_set.target` contain the feature
 data and target values for the training set, respectively, and `test_set.data`
 and `test_set.target` contain feature data and target values for the test set.
 
-Later on, in
-["Fit the DNNClassifier to the Iris Training Data,"](#fit-dnnclassifier)
-you'll use `training_set.data` and `training_set.target` to train your model,
-and in ["Evaluate Model Accuracy,"](#evaluate-accuracy) you'll use
-`test_set.data` and `test_set.target`. But first, you'll construct your model in
-the next section.
+Later on, in ["Fit the DNNClassifier to the Iris Training
+Data,"](#fit-dnnclassifier) you'll use `training_set.data` and
+`training_set.target` to train your model, and in ["Evaluate Model
+Accuracy,"](#evaluate-accuracy) you'll use `test_set.data` and
+`test_set.target`. But first, you'll construct your model in the next section.
 
 ## Construct a Deep Neural Network Classifier
 
@@ -169,8 +184,8 @@ tf.contrib.learn offers a variety of predefined models, called
 use "out of the box" to run training and evaluation operations on your data.
 Here, you'll configure a Deep Neural Network Classifier model to fit the Iris
 data. Using tf.contrib.learn, you can instantiate your
-[`DNNClassifier`](../../api_docs/python/contrib.learn.md#DNNClassifier)
-with just a couple lines of code:
+[`DNNClassifier`](../../api_docs/python/contrib.learn.md#DNNClassifier) with
+just a couple lines of code:
 
 ```python
 # Specify that all features have real-value data
@@ -193,14 +208,14 @@ accordingly to `4` to hold all the data.
 Then, the code creates a `DNNClassifier` model using the following arguments:
 
 *   `feature_columns=feature_columns`. The set of feature columns defined above.
-*   `hidden_units=[10, 20, 10]`. Three
-    [hidden layers](http://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw),
+*   `hidden_units=[10, 20, 10]`. Three [hidden
+    layers](http://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw),
     containing 10, 20, and 10 neurons, respectively.
 *   `n_classes=3`. Three target classes, representing the three Iris species.
 *   `model_dir=/tmp/iris_model`. The directory in which TensorFlow will save
     checkpoint data during model training. For more on logging and monitoring
-    with TensorFlow, see
-    [Logging and Monitoring Basics with tf.contrib.learn](../monitors/index.md).
+    with TensorFlow, see [Logging and Monitoring Basics with
+    tf.contrib.learn](../monitors/index.md).
 
 ## Fit the DNNClassifier to the Iris Training Data {#fit-dnnclassifier}
 
@@ -271,7 +286,7 @@ You can predict their species with the following code:
 # Classify two new flower samples.
 new_samples = np.array(
     [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float)
-y = classifier.predict(new_samples)
+y = list(classifier.predict(new_samples, as_iterable=True))
 print('Predictions: {}'.format(str(y)))
 ```
 
@@ -293,7 +308,8 @@ second sample is *Iris virginica*.
     [Large-scale Linear Models with TensorFlow](../linear/overview.md).
 
 *   To build your own Estimator using tf.contrib.learn APIs, check out [Building
-    Machine Learning Estimator in TensorFlow](http://terrytangyuan.github.io/2016/07/08/understand-and-build-tensorflow-estimator/).
+    Machine Learning Estimator in
+    TensorFlow](http://terrytangyuan.github.io/2016/07/08/understand-and-build-tensorflow-estimator/).
 
 *   To experiment with neural network modeling and visualization in the browser,
     check out [Deep Playground](http://playground.tensorflow.org/).
diff --git a/tensorflow/go/op/op.go b/tensorflow/go/op/op.go
new file mode 100644
index 00000000000..dd79c2076ac
--- /dev/null
+++ b/tensorflow/go/op/op.go
@@ -0,0 +1,51 @@
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package op defines functions for adding TensorFlow operations to a Graph.
+//
+// Functions for adding an operation to a graph take a Scope object as the
+// first argument. The Scope object encapsulates a graph and a set of
+// properties (such as a name prefix) for all operations being added
+// to the graph.
+//
+// WARNING: The API in this package has not been finalized and can
+// change without notice.
+package op
+
+import (
+	tf "github.com/tensorflow/tensorflow/tensorflow/go"
+)
+
+// Const adds an operation to graph that produces value as output.
+func Const(scope *Scope, value interface{}) (tf.Output, error) {
+	if t, ok := value.(*tf.Tensor); ok {
+		return makeConst(scope, t)
+	}
+	t, err := tf.NewTensor(value)
+	if err != nil {
+		return tf.Output{}, err
+	}
+	return makeConst(scope, t)
+}
+
+func makeConst(scope *Scope, t *tf.Tensor) (tf.Output, error) {
+	op, err := scope.Graph().AddOperation(tf.OpSpec{
+		Name: scope.opName("Const"),
+		Type: "Const",
+		Attrs: map[string]interface{}{
+			"dtype": t.DataType(),
+			"value": t,
+		}})
+	return op.Output(0), err
+}
diff --git a/tensorflow/go/op/scope.go b/tensorflow/go/op/scope.go
new file mode 100644
index 00000000000..25ebbae70f6
--- /dev/null
+++ b/tensorflow/go/op/scope.go
@@ -0,0 +1,77 @@
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package op
+
+import (
+	"fmt"
+
+	tf "github.com/tensorflow/tensorflow/tensorflow/go"
+)
+
+// Scope encapsulates common properties of operations being added to a Graph.
+//
+// Scopes allow common properties (such as a name prefix) to be specified
+// once for multiple operations being added to a graph. The With* methods
+// create derivative scopes that encapsulate the same set of properties
+// as the parent Scope, except for the one being changed by the specific
+// With* method.
+//
+// Scopes are NOT safe for concurrent use by multiple goroutines.
+type Scope struct {
+	graph     *tf.Graph
+	namemap   map[string]int
+	namespace string
+}
+
+// NewScope creates a Scope initialized with an empty Graph.
+func NewScope() *Scope {
+	return &Scope{graph: tf.NewGraph(), namemap: make(map[string]int)}
+}
+
+// Graph returns the Graph which this Scope and its children are
+func (s *Scope) Graph() *tf.Graph {
+	return s.graph
+}
+
+// SubScope returns a new Scope which will cause all operations added to the
+// graph to be namespaced with 'namespace'.  If namespace collides with an
+// existing namespace within the scope, then a suffix will be added.
+func (s *Scope) SubScope(namespace string) *Scope {
+	namespace = s.uniqueName(namespace)
+	if s.namespace != "" {
+		namespace = s.namespace + "/" + namespace
+	}
+	return &Scope{
+		graph:     s.graph,
+		namemap:   make(map[string]int),
+		namespace: namespace,
+	}
+}
+
+func (s *Scope) uniqueName(name string) string {
+	count := s.namemap[name]
+	s.namemap[name]++
+	if count == 0 {
+		return name
+	}
+	return fmt.Sprint(name, "_", count)
+}
+
+func (s *Scope) opName(typ string) string {
+	if s.namespace == "" {
+		return typ
+	}
+	return s.namespace + "/" + typ
+}
diff --git a/tensorflow/go/op/scope_test.go b/tensorflow/go/op/scope_test.go
new file mode 100644
index 00000000000..3d1d3364195
--- /dev/null
+++ b/tensorflow/go/op/scope_test.go
@@ -0,0 +1,62 @@
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package op
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestScopeSubScope(t *testing.T) {
+	constant := func(s *Scope) string {
+		c, err := Const(s, int64(1))
+		if err != nil {
+			t.Fatal(err)
+		}
+		return c.Op.Name()
+	}
+	var (
+		root  = NewScope()
+		sub1  = root.SubScope("x")
+		sub2  = root.SubScope("x")
+		sub1a = sub1.SubScope("y")
+		sub2a = sub2.SubScope("y")
+	)
+	testdata := []struct {
+		got, want string
+	}{
+		{constant(root), "Const"},
+		{constant(sub1), "x/Const"},
+		{constant(sub1a), "x/y/Const"},
+		{constant(sub2), "x_1/Const"},
+		{constant(sub2a), "x_1/y/Const"},
+	}
+	for idx, test := range testdata {
+		if test.got != test.want {
+			t.Errorf("#%d: Got %q, want %q", idx, test.got, test.want)
+		}
+	}
+
+}
+
+func ExampleScope_SubScope() {
+	var (
+		s     = NewScope()
+		c1, _ = Const(s.SubScope("x"), int64(1))
+		c2, _ = Const(s.SubScope("x"), int64(1))
+	)
+	fmt.Println(c1.Op.Name(), c2.Op.Name())
+	// Output: x/Const x_1/Const
+}
diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go
index 76a4615a7bf..b1c34b6cd5c 100644
--- a/tensorflow/go/tensor.go
+++ b/tensorflow/go/tensor.go
@@ -27,9 +27,32 @@ import (
 )
 
 // DataType holds the type for a scalar value.  E.g., one slot in a tensor.
-// The values here are identical to corresponding values in types.proto.
 type DataType C.TF_DataType
 
+// Types of scalar values in the TensorFlow type system.
+const (
+	Float      DataType = C.TF_FLOAT
+	Double     DataType = C.TF_DOUBLE
+	Int32      DataType = C.TF_INT32
+	Uint8      DataType = C.TF_UINT8
+	Int16      DataType = C.TF_INT16
+	Int8       DataType = C.TF_INT8
+	String     DataType = C.TF_STRING
+	Complex64  DataType = C.TF_COMPLEX64
+	Complex    DataType = C.TF_COMPLEX
+	Int64      DataType = C.TF_INT64
+	Bool       DataType = C.TF_BOOL
+	Qint8      DataType = C.TF_QINT8
+	Quint8     DataType = C.TF_QUINT8
+	Qint32     DataType = C.TF_QINT32
+	Bfloat16   DataType = C.TF_BFLOAT16
+	Qint16     DataType = C.TF_QINT16
+	Quint16    DataType = C.TF_QUINT16
+	Uint16     DataType = C.TF_UINT16
+	Complex128 DataType = C.TF_COMPLEX128
+	Half       DataType = C.TF_HALF
+)
+
 // Tensor holds a multi-dimensional array of elements of a single data type.
 type Tensor struct {
 	// We create TF_Tensor on demand rather than keep a handle to C.TF_Tensor
diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go
index 630d6137292..fd80658615e 100644
--- a/tensorflow/go/tensor_test.go
+++ b/tensorflow/go/tensor_test.go
@@ -60,6 +60,8 @@ func TestNewTensor(t *testing.T) {
 		[]uint32{5},
 		uint64(5),
 		[]uint64{5},
+		// Mismatched dimensions
+		[][]float32{{1,2,3},{4}},
 	}
 
 	for _, test := range tests {
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 9cf4edee061..59fcc45a811 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2055,6 +2055,9 @@ py_test(
     size = "small",
     srcs = ["training/monitored_session_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        "notsan",  # http://b/32109634
+    ],
     deps = [
         "//tensorflow:tensorflow_py",
     ],
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 9751907eede..64434b40b99 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -46,6 +46,12 @@ py_library(
     srcs_version = "PY2AND3",
 )
 
+py_library(
+    name = "command_parser",
+    srcs = ["cli/command_parser.py"],
+    srcs_version = "PY2AND3",
+)
+
 py_library(
     name = "tensor_format",
     srcs = ["cli/tensor_format.py"],
@@ -58,6 +64,7 @@ py_library(
     srcs = ["cli/analyzer_cli.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":command_parser",
         ":debug_data",
         ":debugger_cli_common",
         ":tensor_format",
@@ -68,7 +75,10 @@ py_library(
     name = "curses_ui",
     srcs = ["cli/curses_ui.py"],
     srcs_version = "PY2AND3",
-    deps = [":debugger_cli_common"],
+    deps = [
+        ":command_parser",
+        ":debugger_cli_common",
+    ],
 )
 
 py_library(
@@ -200,6 +210,19 @@ py_test(
     ],
 )
 
+py_test(
+    name = "command_parser_test",
+    size = "small",
+    srcs = [
+        "cli/command_parser_test.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":command_parser",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
 py_test(
     name = "tensor_format_test",
     size = "small",
diff --git a/tensorflow/python/debug/cli/analyzer_cli.py b/tensorflow/python/debug/cli/analyzer_cli.py
index ab24023b79a..872624707a8 100644
--- a/tensorflow/python/debug/cli/analyzer_cli.py
+++ b/tensorflow/python/debug/cli/analyzer_cli.py
@@ -30,6 +30,7 @@ import re
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.debug import debug_data
+from tensorflow.python.debug.cli import command_parser
 from tensorflow.python.debug.cli import debugger_cli_common
 from tensorflow.python.debug.cli import tensor_format
 
@@ -44,12 +45,10 @@ HANG_SUFFIX = "|- "
 DEPTH_TEMPLATE = "(%d) "
 OP_TYPE_TEMPLATE = "[%s] "
 
-# String consntats for control inputs/outputs, etc.
+# String constants for control inputs/outputs, etc.
 CTRL_LABEL = "(Ctrl) "
 ELLIPSIS = "..."
 
-DEFAULT_NDARRAY_DISPLAY_THRESHOLD = 2000
-
 
 class DebugAnalyzer(object):
   """Analyzer for debug data from dump directories."""
@@ -69,6 +68,10 @@ class DebugAnalyzer(object):
     # Argument parsers for command handlers.
     self._arg_parsers = {}
 
+    # Default threshold number of elements above which ellipses will be used
+    # when printing the value of the tensor.
+    self.default_ndarray_display_threshold = 2000
+
     # Parser for list_tensors.
     ap = argparse.ArgumentParser(
         description="List dumped intermediate tensors.",
@@ -187,11 +190,22 @@ class DebugAnalyzer(object):
     ap.add_argument(
         "tensor_name",
         type=str,
-        help="Name of the tensor, e.g., hidden1/Wx_plus_b/MatMul:0")
+        help="Name of the tensor, followed by any slicing indices, "
+        "e.g., hidden1/Wx_plus_b/MatMul:0, "
+        "hidden1/Wx_plus_b/MatMul:0[1, :]")
+    ap.add_argument(
+        "-n",
+        "--number",
+        dest="number",
+        type=int,
+        default=-1,
+        help="0-based dump number for the specified tensor. "
+        "Required for tensor with multiple dumps.")
+
     ap.add_argument(
         "-a",
         "--all",
-        dest="all",
+        dest="print_all",
         action="store_true",
         help="Print the tensor in its entirety, i.e., do not use ellipses.")
     self._arg_parsers["print_tensor"] = ap
@@ -455,15 +469,22 @@ class DebugAnalyzer(object):
       Output text lines as a RichTextLines object.
     """
 
+    parsed = self._arg_parsers["print_tensor"].parse_args(args)
+
     if screen_info and "cols" in screen_info:
       np_printoptions = {"linewidth": screen_info["cols"]}
     else:
       np_printoptions = {}
 
-    parsed = self._arg_parsers["print_tensor"].parse_args(args)
+    # Determine if there parsed.tensor_name contains any indexing (slicing).
+    if parsed.tensor_name.count("[") == 1 and parsed.tensor_name.endswith("]"):
+      tensor_name = parsed.tensor_name[:parsed.tensor_name.index("[")]
+      tensor_slicing = parsed.tensor_name[parsed.tensor_name.index("["):]
+    else:
+      tensor_name = parsed.tensor_name
+      tensor_slicing = ""
 
-    node_name, output_slot = debug_data.parse_node_or_tensor_name(
-        parsed.tensor_name)
+    node_name, output_slot = debug_data.parse_node_or_tensor_name(tensor_name)
     if output_slot is None:
       return self._error("\"%s\" is not a valid tensor name" %
                          parsed.tensor_name)
@@ -484,25 +505,102 @@ class DebugAnalyzer(object):
           matching_data.append(datum)
 
     if not matching_data:
+      # No dump for this tensor.
       return self._error(
           "Tensor \"%s\" did not generate any dumps." % parsed.tensor_name)
-
-    # TODO(cais): In the case of multiple dumps from the same tensor, require
-    #   explicit specification of the DebugOp and the temporal order.
-    if len(matching_data) > 1:
-      return self._error(
-          "print_tensor logic for multiple dumped records has not been "
-          "implemented.")
-
-    tensor = matching_data[0].get_tensor()
-    if parsed.all:
-      np_printoptions["threshold"] = tensor.size
+    elif len(matching_data) == 1:
+      # There is only one dump for this tensor.
+      if parsed.number <= 0:
+        return self._format_tensor(
+            matching_data[0].get_tensor(),
+            matching_data[0].watch_key,
+            np_printoptions,
+            print_all=parsed.print_all,
+            tensor_slicing=tensor_slicing)
+      else:
+        return self._error(
+            "Invalid number (%d) for tensor %s, which generated one dump." %
+            (parsed.number, parsed.tensor_name))
     else:
-      np_printoptions["threshold"] = DEFAULT_NDARRAY_DISPLAY_THRESHOLD
+      # There are more than one dumps for this tensor.
+      if parsed.number < 0:
+        lines = [
+            "Tensor \"%s\" generated %d dumps:" % (parsed.tensor_name,
+                                                   len(matching_data))
+        ]
+
+        for i, datum in enumerate(matching_data):
+          rel_time = (datum.timestamp - self._debug_dump.t0) / 1000.0
+          lines.append("#%d [%.3f ms] %s" % (i, rel_time, datum.watch_key))
+
+        lines.append("")
+        lines.append(
+            "Use the -n (--number) flag to specify which dump to print.")
+        lines.append("For example:")
+        lines.append("  print_tensor %s -n 0" % parsed.tensor_name)
+
+        return debugger_cli_common.RichTextLines(lines)
+      elif parsed.number >= len(matching_data):
+        return self._error(
+            "Specified number (%d) exceeds the number of available dumps "
+            "(%d) for tensor %s" %
+            (parsed.number, len(matching_data), parsed.tensor_name))
+      else:
+        return self._format_tensor(
+            matching_data[parsed.number].get_tensor(),
+            matching_data[parsed.number].watch_key + " (dump #%d)" %
+            parsed.number,
+            np_printoptions,
+            print_all=parsed.print_all,
+            tensor_slicing=tensor_slicing)
+
+  def _format_tensor(self,
+                     tensor,
+                     watch_key,
+                     np_printoptions,
+                     print_all=False,
+                     tensor_slicing=None):
+    """Generate formatted str to represent a tensor or its slices.
+
+    Args:
+      tensor: (numpy ndarray) The tensor value.
+      watch_key: (str) Tensor debug watch key.
+      np_printoptions: (dict) Numpy tensor formatting options.
+      print_all: (bool) Whether the tensor is to be displayed in its entirety,
+        instead of printing ellipses, even if its number of elements exceeds
+        the default numpy display threshold.
+        (Note: Even if this is set to true, the screen output can still be cut
+         off by the UI frontend if it consist of more lines than the frontend
+         can handle.)
+      tensor_slicing: (str or None) Slicing of the tensor, e.g., "[:, 1]". If
+        None, no slicing will be performed on the tensor.
+
+    Returns:
+      (str) Formatted str representing the (potentially sliced) tensor.
+
+    Raises:
+      ValueError: If tehsor_slicing is not a valid numpy ndarray slicing str.
+    """
+
+    if tensor_slicing:
+      # Validate the indexing.
+      if not command_parser.validate_slicing_string(tensor_slicing):
+        raise ValueError("Invalid tensor-slicing string.")
+
+      value = eval("tensor" + tensor_slicing)  # pylint: disable=eval-used
+      sliced_name = watch_key + tensor_slicing
+    else:
+      value = tensor
+      sliced_name = watch_key
+
+    if print_all:
+      np_printoptions["threshold"] = value.size
+    else:
+      np_printoptions["threshold"] = self.default_ndarray_display_threshold
 
     return tensor_format.format_tensor(
-        tensor,
-        matching_data[0].watch_key,
+        value,
+        sliced_name,
         include_metadata=True,
         np_printoptions=np_printoptions)
 
diff --git a/tensorflow/python/debug/cli/analyzer_cli_test.py b/tensorflow/python/debug/cli/analyzer_cli_test.py
index ee62dcc270d..f5552017666 100644
--- a/tensorflow/python/debug/cli/analyzer_cli_test.py
+++ b/tensorflow/python/debug/cli/analyzer_cli_test.py
@@ -505,6 +505,59 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     self.assertIn(4, out.annotations)
     self.assertIn(5, out.annotations)
 
+  def testPrintTensorWithSlicing(self):
+    out = self._registry.dispatch_command(
+        "print_tensor", ["simple_mul_add/matmul:0[1, :]"],
+        screen_info={"cols": 80})
+
+    self.assertEqual([
+        "Tensor \"simple_mul_add/matmul:0:DebugIdentity[1, :]\":",
+        "  dtype: float64", "  shape: (1,)", "", "array([-2.])"
+    ], out.lines)
+
+    self.assertIn("tensor_metadata", out.annotations)
+    self.assertIn(4, out.annotations)
+
+  def testPrintTensorInvalidSlicingString(self):
+    out = self._registry.dispatch_command(
+        "print_tensor", ["simple_mul_add/matmul:0[1, foo()]"],
+        screen_info={"cols": 80})
+
+    self.assertEqual("Error occurred during handling of command: print_tensor "
+                     "simple_mul_add/matmul:0[1, foo()]:", out.lines[0])
+    self.assertEqual("ValueError: Invalid tensor-slicing string.",
+                     out.lines[-2])
+
+  def testPrintTensorValidExplicitNumber(self):
+    out = self._registry.dispatch_command(
+        "print_tensor", ["simple_mul_add/matmul:0", "-n", "0"],
+        screen_info={"cols": 80})
+
+    self.assertEqual([
+        "Tensor \"simple_mul_add/matmul:0:DebugIdentity\":",
+        "  dtype: float64",
+        "  shape: (2, 1)",
+        "",
+        "array([[ 7.],",
+        "       [-2.]])",
+    ], out.lines)
+
+    self.assertIn("tensor_metadata", out.annotations)
+    self.assertIn(4, out.annotations)
+    self.assertIn(5, out.annotations)
+
+  def testPrintTensorInvalidExplicitNumber(self):
+    out = self._registry.dispatch_command(
+        "print_tensor", ["simple_mul_add/matmul:0", "-n", "1"],
+        screen_info={"cols": 80})
+
+    self.assertEqual([
+        "ERROR: Invalid number (1) for tensor simple_mul_add/matmul:0, "
+        "which generated one dump."
+    ], out.lines)
+
+    self.assertNotIn("tensor_metadata", out.annotations)
+
   def testPrintTensorMissingOutputSlot(self):
     out = self._registry.dispatch_command(
         "print_tensor", ["simple_mul_add/matmul"])
@@ -568,6 +621,78 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
       analyzer.get_tensor_filter("bar")
 
 
+class AnalyzerCLIPrintLargeTensorTest(test_util.TensorFlowTestCase):
+
+  @classmethod
+  def setUpClass(cls):
+    cls._dump_root = tempfile.mkdtemp()
+
+    with session.Session() as sess:
+      # 2400 elements should exceed the default threshold (2000).
+      x = constant_op.constant(np.zeros([300, 8]), name="large_tensors/x")
+
+      run_options = config_pb2.RunOptions(output_partition_graphs=True)
+      debug_utils.watch_graph(
+          run_options,
+          sess.graph,
+          debug_ops=["DebugIdentity"],
+          debug_urls="file://%s" % cls._dump_root)
+
+      # Invoke Session.run().
+      run_metadata = config_pb2.RunMetadata()
+      sess.run(x, options=run_options, run_metadata=run_metadata)
+
+    cls._debug_dump = debug_data.DebugDumpDir(
+        cls._dump_root, partition_graphs=run_metadata.partition_graphs)
+
+    # Construct the analyzer.
+    cls._analyzer = analyzer_cli.DebugAnalyzer(cls._debug_dump)
+
+    # Construct the handler registry.
+    cls._registry = debugger_cli_common.CommandHandlerRegistry()
+
+    # Register command handler.
+    cls._registry.register_command_handler(
+        "print_tensor",
+        cls._analyzer.print_tensor,
+        cls._analyzer.get_help("print_tensor"),
+        prefix_aliases=["pt"])
+
+  @classmethod
+  def tearDownClass(cls):
+    # Tear down temporary dump directory.
+    shutil.rmtree(cls._dump_root)
+
+  def testPrintLargeTensorWithoutAllOption(self):
+    out = self._registry.dispatch_command(
+        "print_tensor", ["large_tensors/x:0"], screen_info={"cols": 80})
+
+    print(out.lines)  # DEBUG
+
+    # Assert that ellipses are present in the tensor value printout.
+    self.assertIn("...,", out.lines[4])
+
+    # 2100 still exceeds 2000.
+    out = self._registry.dispatch_command(
+        "print_tensor", ["large_tensors/x:0[:, 0:7]"],
+        screen_info={"cols": 80})
+
+    self.assertIn("...,", out.lines[4])
+
+  def testPrintLargeTensorWithAllOption(self):
+    out = self._registry.dispatch_command(
+        "print_tensor", ["large_tensors/x:0", "-a"],
+        screen_info={"cols": 80})
+
+    # Assert that ellipses are not present in the tensor value printout.
+    self.assertNotIn("...,", out.lines[4])
+
+    out = self._registry.dispatch_command(
+        "print_tensor", ["large_tensors/x:0[:, 0:7]", "--all"],
+        screen_info={"cols": 80})
+    self.assertNotIn("...,", out.lines[4])
+
+
 class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
 
   @classmethod
@@ -811,5 +936,94 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
         "  [Op]: Input node has op type Op."], out.lines)
 
 
+class AnalyzerCLIWhileLoopTest(test_util.TensorFlowTestCase):
+
+  @classmethod
+  def setUpClass(cls):
+    cls._dump_root = tempfile.mkdtemp()
+
+    with session.Session() as sess:
+      loop_var = constant_op.constant(0, name="while_loop_test/loop_var")
+      cond = lambda loop_var: math_ops.less(loop_var, 10)
+      body = lambda loop_var: math_ops.add(loop_var, 1)
+      while_loop = control_flow_ops.while_loop(
+          cond, body, [loop_var], parallel_iterations=1)
+
+      run_options = config_pb2.RunOptions(output_partition_graphs=True)
+      debug_url = "file://%s" % cls._dump_root
+
+      watch_opts = run_options.debug_tensor_watch_opts
+
+      # Add debug tensor watch for "while/Identity".
+      watch = watch_opts.add()
+      watch.node_name = "while/Identity"
+      watch.output_slot = 0
+      watch.debug_ops.append("DebugIdentity")
+      watch.debug_urls.append(debug_url)
+
+      # Invoke Session.run().
+      run_metadata = config_pb2.RunMetadata()
+      sess.run(while_loop, options=run_options, run_metadata=run_metadata)
+
+    cls._debug_dump = debug_data.DebugDumpDir(
+        cls._dump_root, partition_graphs=run_metadata.partition_graphs)
+
+    cls._analyzer = analyzer_cli.DebugAnalyzer(cls._debug_dump)
+    cls._registry = debugger_cli_common.CommandHandlerRegistry()
+    cls._registry.register_command_handler(
+        "list_tensors",
+        cls._analyzer.list_tensors,
+        cls._analyzer.get_help("list_tensors"),
+        prefix_aliases=["lt"])
+    cls._registry.register_command_handler(
+        "print_tensor",
+        cls._analyzer.print_tensor,
+        cls._analyzer.get_help("print_tensor"),
+        prefix_aliases=["pt"])
+
+  @classmethod
+  def tearDownClass(cls):
+    # Tear down temporary dump directory.
+    shutil.rmtree(cls._dump_root)
+
+  def testMultipleDumpsPrintTensorNoNumber(self):
+    output = self._registry.dispatch_command("pt", ["while/Identity:0"])
+
+    self.assertEqual("Tensor \"while/Identity:0\" generated 10 dumps:",
+                     output.lines[0])
+
+    for i in xrange(10):
+      self.assertTrue(output.lines[i + 1].startswith("#%d" % i))
+      self.assertTrue(output.lines[i + 1].endswith(
+          " ms] while/Identity:0:DebugIdentity"))
+
+    self.assertEqual(
+        "Use the -n (--number) flag to specify which dump to print.",
+        output.lines[-3])
+    self.assertEqual("For example:", output.lines[-2])
+    self.assertEqual("  print_tensor while/Identity:0 -n 0", output.lines[-1])
+
+  def testMultipleDumpsPrintTensorWithNumber(self):
+    for i in xrange(5):
+      output = self._registry.dispatch_command(
+          "pt", ["while/Identity:0", "-n", "%d" % i])
+
+      self.assertEqual("Tensor \"while/Identity:0:DebugIdentity (dump #%d)\":" %
+                       i, output.lines[0])
+      self.assertEqual("  dtype: int32", output.lines[1])
+      self.assertEqual("  shape: ()", output.lines[2])
+      self.assertEqual("", output.lines[3])
+      self.assertEqual("array(%d, dtype=int32)" % i, output.lines[4])
+
+  def testMultipleDumpsPrintTensorInvalidNumber(self):
+    output = self._registry.dispatch_command("pt",
+                                             ["while/Identity:0", "-n", "10"])
+
+    self.assertEqual([
+        "ERROR: Specified number (10) exceeds the number of available dumps "
+        "(10) for tensor while/Identity:0"
+    ], output.lines)
+
+
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/debug/cli/command_parser.py b/tensorflow/python/debug/cli/command_parser.py
new file mode 100644
index 00000000000..4a70468e278
--- /dev/null
+++ b/tensorflow/python/debug/cli/command_parser.py
@@ -0,0 +1,110 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Command parsing module for TensorFlow Debugger (tfdbg)."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import re
+
+_BRACKETS_PATTERN = re.compile(r"\[[^\]]*\]")
+_QUOTES_PATTERN = re.compile(r"\"[^\"]*\"")
+_WHITESPACE_PATTERN = re.compile(r"\s+")
+
+
+def parse_command(command):
+  """Parse command string into a list of arguments.
+
+  - Disregards whitespace inside double quotes and brackets.
+  - Strips paired leading and trailing double quotes in arguments.
+  - Splits the command at whitespace.
+
+  Nested double quotes and brackets are not handled.
+
+  Args:
+    command: (str) Input command.
+
+  Returns:
+    (list of str) List of arguments.
+  """
+
+  command = command.strip()
+  if not command:
+    return []
+
+  brackets_intervals = [f.span() for f in _BRACKETS_PATTERN.finditer(command)]
+  quotes_intervals = [f.span() for f in _QUOTES_PATTERN.finditer(command)]
+  whitespaces_intervals = [
+      f.span() for f in _WHITESPACE_PATTERN.finditer(command)
+  ]
+
+  if not whitespaces_intervals:
+    return [command]
+
+  arguments = []
+  idx0 = 0
+  for start, end in whitespaces_intervals + [(len(command), None)]:
+    # Skip whitespace stretches enclosed in brackets or double quotes.
+
+    if not any(interval[0] < start < interval[1]
+               for interval in brackets_intervals + quotes_intervals):
+      argument = command[idx0:start]
+
+      # Strip leading and trailing double quote if they are paired.
+      if argument.startswith("\"") and argument.endswith("\""):
+        argument = argument[1:-1]
+      arguments.append(argument)
+      idx0 = end
+
+  return arguments
+
+
+def parse_tensor_name_with_slicing(in_str):
+  """Parse tensor name, potentially suffixed by slicing string.
+
+  Args:
+    in_str: (str) Input name of the tensor, potentially followed by a slicing
+      string. E.g.: Without slicing string: "hidden/weights/Variable:0", with
+      slicing string: "hidden/weights/Varaible:0[1, :]"
+
+  Returns:
+    (str) name of the tensor
+    (str) sliciing string, if any. If no slicing string is present, return "".
+  """
+
+  if in_str.count("[") == 1 and in_str.endswith("]"):
+    tensor_name = in_str[:in_str.index("[")]
+    tensor_slicing = in_str[in_str.index("["):]
+  else:
+    tensor_name = in_str
+    tensor_slicing = ""
+
+  return tensor_name, tensor_slicing
+
+
+def validate_slicing_string(slicing_string):
+  """Validate a slicing string.
+
+  Check if the input string contains only brackets, digits, commas and
+  colons that are valid characters in numpy-style array slicing.
+
+  Args:
+    slicing_string: (str) Input slicing string to be validated.
+
+  Returns:
+    (bool) True if and only if the slicing string is valid.
+  """
+
+  return bool(re.search(r"^\[(\d|,|\s|:)+\]$", slicing_string))
diff --git a/tensorflow/python/debug/cli/command_parser_test.py b/tensorflow/python/debug/cli/command_parser_test.py
new file mode 100644
index 00000000000..b819f25e69b
--- /dev/null
+++ b/tensorflow/python/debug/cli/command_parser_test.py
@@ -0,0 +1,133 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for TensorFlow Debugger command parser."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.debug.cli import command_parser
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import googletest
+
+
+class ParseCommandTest(test_util.TensorFlowTestCase):
+
+  def testParseNoBracketsOrQuotes(self):
+    command = ""
+    self.assertEqual([], command_parser.parse_command(command))
+
+    command = "a"
+    self.assertEqual(["a"], command_parser.parse_command(command))
+
+    command = "foo bar baz qux"
+    self.assertEqual(["foo", "bar", "baz", "qux"],
+                     command_parser.parse_command(command))
+
+    command = "foo bar\tbaz\t qux"
+    self.assertEqual(["foo", "bar", "baz", "qux"],
+                     command_parser.parse_command(command))
+
+  def testParseLeadingTrailingWhitespaces(self):
+    command = "  foo bar baz qux   "
+    self.assertEqual(["foo", "bar", "baz", "qux"],
+                     command_parser.parse_command(command))
+
+    command = "\nfoo bar baz qux\n"
+    self.assertEqual(["foo", "bar", "baz", "qux"],
+                     command_parser.parse_command(command))
+
+  def testParseCommandsWithBrackets(self):
+    command = "pt foo[1, 2, :]"
+    self.assertEqual(["pt", "foo[1, 2, :]"],
+                     command_parser.parse_command(command))
+    command = "pt  foo[1, 2, :]   -a"
+    self.assertEqual(["pt", "foo[1, 2, :]", "-a"],
+                     command_parser.parse_command(command))
+
+    command = "inject_value foo [1, 2,:] 0"
+    self.assertEqual(["inject_value", "foo", "[1, 2,:]", "0"],
+                     command_parser.parse_command(command))
+
+  def testParseCommandWithTwoArgsContainingBrackets(self):
+    command = "pt foo[1, :] bar[:, 2]"
+    self.assertEqual(["pt", "foo[1, :]", "bar[:, 2]"],
+                     command_parser.parse_command(command))
+
+    command = "pt foo[] bar[:, 2]"
+    self.assertEqual(["pt", "foo[]", "bar[:, 2]"],
+                     command_parser.parse_command(command))
+
+  def testParseCommandWithUnmatchedBracket(self):
+    command = "pt  foo[1, 2, :"
+    self.assertNotEqual(["pt", "foo[1, 2, :]"],
+                        command_parser.parse_command(command))
+
+  def testParseCommandsWithQuotes(self):
+    command = "inject_value foo \"np.zeros([100, 500])\""
+    self.assertEqual(["inject_value", "foo", "np.zeros([100, 500])"],
+                     command_parser.parse_command(command))
+    # The pair of double quotes should have been stripped.
+
+    command = "\"command prefix with spaces\" arg1"
+    self.assertEqual(["command prefix with spaces", "arg1"],
+                     command_parser.parse_command(command))
+
+  def testParseCommandWithTwoArgsContainingQuotes(self):
+    command = "foo \"bar\" \"qux\""
+    self.assertEqual(["foo", "bar", "qux"],
+                     command_parser.parse_command(command))
+
+    command = "foo \"\" \"qux\""
+    self.assertEqual(["foo", "", "qux"],
+                     command_parser.parse_command(command))
+
+
+class ParseTensorNameTest(test_util.TensorFlowTestCase):
+
+  def testParseTensorNameWithoutSlicing(self):
+    (tensor_name,
+     tensor_slicing) = command_parser.parse_tensor_name_with_slicing(
+         "hidden/weights/Variable:0")
+
+    self.assertEqual("hidden/weights/Variable:0", tensor_name)
+    self.assertEqual("", tensor_slicing)
+
+  def testParseTensorNameWithSlicing(self):
+    (tensor_name,
+     tensor_slicing) = command_parser.parse_tensor_name_with_slicing(
+         "hidden/weights/Variable:0[:, 1]")
+
+    self.assertEqual("hidden/weights/Variable:0", tensor_name)
+    self.assertEqual("[:, 1]", tensor_slicing)
+
+
+class ValidateSlicingStringTest(test_util.TensorFlowTestCase):
+
+  def testValidateValidSlicingStrings(self):
+    self.assertTrue(command_parser.validate_slicing_string("[1]"))
+    self.assertTrue(command_parser.validate_slicing_string("[2,3]"))
+    self.assertTrue(command_parser.validate_slicing_string("[4, 5, 6]"))
+    self.assertTrue(command_parser.validate_slicing_string("[7,:, :]"))
+
+  def testValidateInvalidSlicingStrings(self):
+    self.assertFalse(command_parser.validate_slicing_string(""))
+    self.assertFalse(command_parser.validate_slicing_string("[1,"))
+    self.assertFalse(command_parser.validate_slicing_string("2,3]"))
+    self.assertFalse(command_parser.validate_slicing_string("[4, foo()]"))
+    self.assertFalse(command_parser.validate_slicing_string("[5, bar]"))
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/debug/cli/curses_ui.py b/tensorflow/python/debug/cli/curses_ui.py
index 5aafe541c8d..bcdd675f9b1 100644
--- a/tensorflow/python/debug/cli/curses_ui.py
+++ b/tensorflow/python/debug/cli/curses_ui.py
@@ -25,6 +25,7 @@ import sys
 
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.python.debug.cli import command_parser
 from tensorflow.python.debug.cli import debugger_cli_common
 
 
@@ -444,18 +445,11 @@ class CursesUI(object):
       args: (list of str) The command arguments (i.e., not including the
         prefix).
     """
-
-    # TODO(cais): Support parsing of arguments surrounded by pairs of quotes
-    #   and with spaces in them.
-
     command = command.strip()
     if not command:
       return "", []
 
-    # Split and remove extra spaces.
-    command_items = command.split(" ")
-    command_items = [item for item in command_items if item]
-
+    command_items = command_parser.parse_command(command)
     return command_items[0], command_items[1:]
 
   def _screen_gather_textbox_str(self):
diff --git a/tensorflow/python/debug/examples/README.md b/tensorflow/python/debug/examples/README.md
index 5a69749e788..26219600a8f 100644
--- a/tensorflow/python/debug/examples/README.md
+++ b/tensorflow/python/debug/examples/README.md
@@ -143,6 +143,7 @@ Try the following commands at the `tfdbg>` prompt:
 | Command example    | Explanation           |
 | ------------- |:--------------------- |
 | `pt hidden/Relu:0` | Print the value of the tensor `hidden/Relu:0`. |
+| `pt hidden/Relu:0[:, 1]` | Print a subarray of the tensor `hidden/Relu:0`, using numpy-style array slicing. |
 | `ni -a hidden/Relu` | Displays information about the node `hidden/Relu`, including node attributes. |
 | `li -r hidden/Relu:0` | List the inputs to the node `hidden/Relu`, recursively, i.e., the input tree. |
 | `lo -r hidden/Relu:0` | List the recipients of the output of the node `hidden/Relu`, recursively, i.e., the output recipient tree. |
diff --git a/tensorflow/python/framework/cpp_shape_inference.cc b/tensorflow/python/framework/cpp_shape_inference.cc
index acf7aa8a1da..0d8703fe8fe 100644
--- a/tensorflow/python/framework/cpp_shape_inference.cc
+++ b/tensorflow/python/framework/cpp_shape_inference.cc
@@ -74,7 +74,6 @@ Status RunCppShapeInferenceImpl(
 
   // Run shape inference.
   tensorflow::shape_inference::InferenceContext c(&node, op_reg_data->op_def,
-                                                  {} /* input_shape_strings */,
                                                   input_shapes, input_tensors);
   TF_RETURN_IF_ERROR(c.construction_status());
 
diff --git a/tensorflow/python/framework/gen_docs_combined.py b/tensorflow/python/framework/gen_docs_combined.py
index e512362a26d..f7d0351e71c 100644
--- a/tensorflow/python/framework/gen_docs_combined.py
+++ b/tensorflow/python/framework/gen_docs_combined.py
@@ -63,6 +63,7 @@ def module_names():
       "tf.contrib.copy_graph",
       "tf.contrib.crf",
       "tf.contrib.distributions",
+      "tf.contrib.distributions.bijector",
       "tf.contrib.ffmpeg",
       "tf.contrib.framework",
       "tf.contrib.graph_editor",
@@ -212,6 +213,9 @@ def all_libraries(module_to_name, members, documented):
       library("contrib.crf", "CRF (contrib)", tf.contrib.crf),
       library("contrib.distributions", "Statistical distributions (contrib)",
               tf.contrib.distributions),
+      library("contrib.distributions.bijector",
+              "Random variable transformations (contrib)",
+              tf.contrib.distributions.bijector),
       library("contrib.ffmpeg", "FFmpeg (contrib)", ffmpeg),
       library("contrib.framework", "Framework (contrib)", tf.contrib.framework),
       library("contrib.graph_editor", "Graph Editor (contrib)",
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 318e6568681..50f914d1f88 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -513,13 +513,17 @@ class Tensor(object):
       # ...
     ```
 
+    This disallows ambiguities between testing the Python value vs testing the
+    dynamic condition of the `Tensor`.
+
     Raises:
       `TypeError`.
     """
     raise TypeError("Using a `tf.Tensor` as a Python `bool` is not allowed. "
                     "Use `if t is not None:` instead of `if t:` to test if a "
-                    "tensor is defined, and use the logical TensorFlow ops "
-                    "to test the value of a tensor.")
+                    "tensor is defined, and use TensorFlow ops such as "
+                    "tf.cond to execute subgraphs conditioned on the value of "
+                    "a tensor.")
 
   def __nonzero__(self):
     """Dummy method to prevent a tensor from being used as a Python `bool`.
@@ -531,8 +535,9 @@ class Tensor(object):
     """
     raise TypeError("Using a `tf.Tensor` as a Python `bool` is not allowed. "
                     "Use `if t is not None:` instead of `if t:` to test if a "
-                    "tensor is defined, and use the logical TensorFlow ops "
-                    "to test the value of a tensor.")
+                    "tensor is defined, and use TensorFlow ops such as "
+                    "tf.cond to execute subgraphs conditioned on the value of "
+                    "a tensor.")
 
   def eval(self, feed_dict=None, session=None):
     """Evaluates this tensor in a `Session`.
@@ -2157,6 +2162,16 @@ class Graph(object):
     """
     self._finalized = True
 
+  def _unsafe_unfinalize(self):
+    """Opposite of `finalize`. Internal interface.
+
+    NOTE: Unfinalizing a graph could have negative impact on performance,
+    especially in a multi-threaded environment.  Unfinalizing a graph
+    when it is in use by a Session may lead to undefined behavior. Ensure
+    that all sessions using a graph are closed before calling this method.
+    """
+    self._finalized = False
+
   def _get_control_flow_context(self):
     """Returns the current control flow context.
 
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 5bac51d34ac..34c6b326b4e 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -364,6 +364,10 @@ class CreateOpTest(test_util.TensorFlowTestCase):
     with self.assertRaises(RuntimeError):
       g.create_op("const", [], [dtypes.float32], None, name="myop1")
 
+    # Test unfinalize.
+    g._unsafe_unfinalize()
+    g.create_op("const", [], [dtypes.float32], None, name="myop1")
+
 
 class ApplyOpTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py
index 5b56b547d6f..4ef4af4d550 100644
--- a/tensorflow/python/kernel_tests/linalg_ops_test.py
+++ b/tensorflow/python/kernel_tests/linalg_ops_test.py
@@ -53,5 +53,186 @@ class CholeskySolveGpuTest(CholeskySolveTest):
   _use_gpu = True
 
 
+class EyeTest(tf.test.TestCase):
+
+  def test_non_batch_2x2(self):
+    num_rows = 2
+    dtype = np.float32
+    np_eye = np.eye(num_rows).astype(dtype)
+    with self.test_session():
+      eye = tf.eye(num_rows, dtype=dtype)
+      self.assertAllEqual((num_rows, num_rows), eye.get_shape())
+      self.assertAllEqual(np_eye, eye.eval())
+
+  def test_non_batch_2x3(self):
+    num_rows = 2
+    num_columns = 3
+    dtype = np.float32
+    np_eye = np.eye(num_rows, num_columns).astype(dtype)
+    with self.test_session():
+      eye = tf.eye(num_rows, num_columns=num_columns, dtype=dtype)
+      self.assertAllEqual((num_rows, num_columns), eye.get_shape())
+      self.assertAllEqual(np_eye, eye.eval())
+
+  def test_1x3_batch_4x4(self):
+    num_rows = 4
+    batch_shape = [1, 3]
+    dtype = np.float32
+    np_eye = np.eye(num_rows).astype(dtype)
+    with self.test_session():
+      eye = tf.eye(num_rows, batch_shape=batch_shape, dtype=dtype)
+      self.assertAllEqual(batch_shape + [num_rows, num_rows], eye.get_shape())
+      eye_v = eye.eval()
+      for i in range(batch_shape[0]):
+        for j in range(batch_shape[1]):
+          self.assertAllEqual(np_eye, eye_v[i, j, :, :])
+
+  def test_1x3_batch_4x4_dynamic(self):
+    num_rows = 4
+    batch_shape = [1, 3]
+    dtype = np.float32
+    np_eye = np.eye(num_rows).astype(dtype)
+    with self.test_session():
+      num_rows_ph = tf.placeholder(tf.int32)
+      batch_shape_ph = tf.placeholder(tf.int32)
+      eye = tf.eye(
+          num_rows_ph,
+          batch_shape=batch_shape_ph,
+          dtype=dtype)
+      eye_v = eye.eval(
+          feed_dict={
+              num_rows_ph: num_rows,
+              batch_shape_ph: batch_shape})
+      for i in range(batch_shape[0]):
+        for j in range(batch_shape[1]):
+          self.assertAllEqual(np_eye, eye_v[i, j, :, :])
+
+  def test_1x3_batch_5x4(self):
+    num_rows = 5
+    num_columns = 4
+    batch_shape = [1, 3]
+    dtype = np.float32
+    np_eye = np.eye(num_rows, num_columns).astype(dtype)
+    with self.test_session():
+      eye = tf.eye(
+          num_rows,
+          num_columns=num_columns,
+          batch_shape=batch_shape,
+          dtype=dtype)
+      self.assertAllEqual(
+          batch_shape + [num_rows, num_columns],
+          eye.get_shape())
+      eye_v = eye.eval()
+      for i in range(batch_shape[0]):
+        for j in range(batch_shape[1]):
+          self.assertAllEqual(np_eye, eye_v[i, j, :, :])
+
+  def test_1x3_batch_5x4_dynamic(self):
+    num_rows = 5
+    num_columns = 4
+    batch_shape = [1, 3]
+    dtype = np.float32
+    np_eye = np.eye(num_rows, num_columns).astype(dtype)
+    with self.test_session():
+      num_rows_ph = tf.placeholder(tf.int32)
+      num_columns_ph = tf.placeholder(tf.int32)
+      batch_shape_ph = tf.placeholder(tf.int32)
+      eye = tf.eye(
+          num_rows_ph,
+          num_columns=num_columns_ph,
+          batch_shape=batch_shape_ph,
+          dtype=dtype)
+      eye_v = eye.eval(
+          feed_dict={
+              num_rows_ph: num_rows,
+              num_columns_ph: num_columns,
+              batch_shape_ph: batch_shape})
+      for i in range(batch_shape[0]):
+        for j in range(batch_shape[1]):
+          self.assertAllEqual(np_eye, eye_v[i, j, :, :])
+
+  def test_non_batch_0x0(self):
+    num_rows = 0
+    dtype = np.int64
+    np_eye = np.eye(num_rows).astype(dtype)
+    with self.test_session():
+      eye = tf.eye(num_rows, dtype=dtype)
+      self.assertAllEqual((num_rows, num_rows), eye.get_shape())
+      self.assertAllEqual(np_eye, eye.eval())
+
+  def test_non_batch_2x0(self):
+    num_rows = 2
+    num_columns = 0
+    dtype = np.int64
+    np_eye = np.eye(num_rows, num_columns).astype(dtype)
+    with self.test_session():
+      eye = tf.eye(num_rows, num_columns=num_columns, dtype=dtype)
+      self.assertAllEqual((num_rows, num_columns), eye.get_shape())
+      self.assertAllEqual(np_eye, eye.eval())
+
+  def test_non_batch_0x2(self):
+    num_rows = 0
+    num_columns = 2
+    dtype = np.int64
+    np_eye = np.eye(num_rows, num_columns).astype(dtype)
+    with self.test_session():
+      eye = tf.eye(num_rows, num_columns=num_columns, dtype=dtype)
+      self.assertAllEqual((num_rows, num_columns), eye.get_shape())
+      self.assertAllEqual(np_eye, eye.eval())
+
+  def test_1x3_batch_0x0(self):
+    num_rows = 0
+    batch_shape = [1, 3]
+    dtype = np.float32
+    np_eye = np.eye(num_rows).astype(dtype)
+    with self.test_session():
+      eye = tf.eye(num_rows, batch_shape=batch_shape, dtype=dtype)
+      self.assertAllEqual((1, 3, 0, 0), eye.get_shape())
+      eye_v = eye.eval()
+      for i in range(batch_shape[0]):
+        for j in range(batch_shape[1]):
+          self.assertAllEqual(np_eye, eye_v[i, j, :, :])
+
+  def test_1x3_batch_2x0(self):
+    num_rows = 2
+    num_columns = 0
+    batch_shape = [1, 3]
+    dtype = np.float32
+    np_eye = np.eye(num_rows, num_columns).astype(dtype)
+    with self.test_session():
+      eye = tf.eye(
+          num_rows,
+          num_columns=num_columns,
+          batch_shape=batch_shape,
+          dtype=dtype)
+      self.assertAllEqual(
+          batch_shape + [num_rows, num_columns],
+          eye.get_shape())
+      eye_v = eye.eval()
+      for i in range(batch_shape[0]):
+        for j in range(batch_shape[1]):
+          self.assertAllEqual(np_eye, eye_v[i, j, :, :])
+
+  def test_1x3_batch_0x2(self):
+    num_rows = 0
+    num_columns = 2
+    batch_shape = [1, 3]
+    dtype = np.float32
+    np_eye = np.eye(num_rows, num_columns).astype(dtype)
+    with self.test_session():
+      eye = tf.eye(
+          num_rows,
+          num_columns=num_columns,
+          batch_shape=batch_shape,
+          dtype=dtype)
+      self.assertAllEqual(
+          batch_shape + [num_rows, num_columns],
+          eye.get_shape())
+      eye_v = eye.eval()
+      for i in range(batch_shape[0]):
+        for j in range(batch_shape[1]):
+          self.assertAllEqual(np_eye, eye_v[i, j, :, :])
+
+
 if __name__ == '__main__':
   tf.test.main()
diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py
index 7826800d0fb..465a02bfe69 100644
--- a/tensorflow/python/kernel_tests/shape_ops_test.py
+++ b/tensorflow/python/kernel_tests/shape_ops_test.py
@@ -354,11 +354,10 @@ class TileTest(tf.test.TestCase):
         bytes: (tf.string, bytes)
     }
     for dtype_np, (dtype_tf, cast) in types_to_test.items():
-      with self.test_session():
+      with self.test_session(use_gpu=True):
         inp = np.random.rand(4, 1).astype(dtype_np)
-        a = tf.constant([cast(x) for x in inp.ravel(order="C")],
-                     shape=[4, 1],
-                     dtype=dtype_tf)
+        a = tf.constant([cast(x) for x in inp.ravel(order="C")], shape=[4, 1],
+                   dtype=dtype_tf)
         tiled = tf.tile(a, [1, 4])
         result = tiled.eval()
       self.assertEqual(result.shape, (4, 4))
diff --git a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
index e87e8b02b23..29659d39c51 100644
--- a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
+++ b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
@@ -539,6 +539,34 @@ class IndexedSlicesConditionalAccumulatorTest(tf.test.TestCase):
       val = sess.run(q.take_indexed_slices_grad(1))
       self.assertAllEqual(val.dense_shape, [-1, 2, 2, 3])
 
+  def testApplyGradtInt32IndicesAndShape(self):
+    with self.test_session() as sess:
+      q = tf.SparseConditionalAccumulator(
+          tf.float32, name="Q", shape=tf.TensorShape([3, 3]))
+      accum_op = q.apply_grad(
+          grad_indices=tf.constant(
+              [0, 2], dtype=tf.int32),
+          grad_values=tf.constant(
+              [[0, 0, 1], [3, 0, 4]], dtype=tf.float32),
+          grad_shape=tf.constant(
+              [3, 3], dtype=tf.int32))
+      accum_op.run()
+      accum_op = q.apply_indexed_slices_grad(
+          tf.IndexedSlices(
+              indices=tf.constant(
+                  [0, 2], dtype=tf.int32),
+              values=tf.constant(
+                  [[0, 0, 1], [3, 0, 4]], dtype=tf.float32),
+              dense_shape=tf.constant(
+                  [3, 3], dtype=tf.int32)))
+      accum_op.run()
+      self.assertEqual(q.num_accumulated().eval(), 2)
+
+      val = sess.run(q.take_indexed_slices_grad(1))
+      self.assertAllEqual(val.indices, [0, 2])
+      self.assertAllEqual(val.values, [[0, 0, 1], [3, 0, 4]])
+      self.assertAllEqual(val.dense_shape, [3, 3])
+
 
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/python/kernel_tests/summary_tensor_op_test.py b/tensorflow/python/kernel_tests/summary_tensor_op_test.py
index e65fd66f2e9..7ea8e6680be 100644
--- a/tensorflow/python/kernel_tests/summary_tensor_op_test.py
+++ b/tensorflow/python/kernel_tests/summary_tensor_op_test.py
@@ -38,12 +38,12 @@ class SummaryOpsTest(tf.test.TestCase):
   def testNodeNames(self):
     with self.test_session() as sess:
       c = tf.constant(1)
-      s1 = tf.summary.tensor_summary("", c, name="s1")
+      s1 = tf.summary.tensor_summary("s1", c)
       with tf.name_scope("foo"):
-        s2 = tf.summary.tensor_summary("", c, name="s2")
+        s2 = tf.summary.tensor_summary("s2", c)
         with tf.name_scope("zod"):
-          s3 = tf.summary.tensor_summary("", c, name="s3")
-          s4 = tf.summary.tensor_summary("", c)
+          s3 = tf.summary.tensor_summary("s3", c)
+          s4 = tf.summary.tensor_summary("TensorSummary", c)
       summ1, summ2, summ3, summ4 = sess.run([s1, s2, s3, s4])
 
     v1 = self._SummarySingleValue(summ1)
@@ -61,7 +61,7 @@ class SummaryOpsTest(tf.test.TestCase):
   def testScalarSummary(self):
     with self.test_session() as sess:
       const = tf.constant(10.0)
-      summ = tf.summary.tensor_summary("foo", const)
+      summ = tf.summary.scalar("foo", const)
       result = sess.run(summ)
 
     value = self._SummarySingleValue(result)
diff --git a/tensorflow/python/lib/io/file_io.i b/tensorflow/python/lib/io/file_io.i
index 55a9d503f3c..9688a9e02ad 100644
--- a/tensorflow/python/lib/io/file_io.i
+++ b/tensorflow/python/lib/io/file_io.i
@@ -176,10 +176,11 @@ tensorflow::io::BufferedInputStream* CreateBufferedInputStream(
     return nullptr;
   }
   std::unique_ptr<tensorflow::io::RandomAccessInputStream> input_stream(
-      new tensorflow::io::RandomAccessInputStream(file.release()));
+      new tensorflow::io::RandomAccessInputStream(
+          file.release(), true /* owns_file */));
   std::unique_ptr<tensorflow::io::BufferedInputStream> buffered_input_stream(
-      new tensorflow::io::BufferedInputStream(input_stream.release(),
-                                              buffer_size));
+      new tensorflow::io::BufferedInputStream(
+          input_stream.release(), buffer_size, true /* owns_input_stream */));
   return buffered_input_stream.release();
 }
 
@@ -207,13 +208,6 @@ void AppendToFile(const string& file_content, tensorflow::WritableFile* file,
   }
 }
 
-void FlushWritableFile(tensorflow::WritableFile* file, TF_Status* out_status) {
-  tensorflow::Status status = file->Flush();
-  if (!status.ok()) {
-    Set_TF_Status_from_Status(out_status, status);
-  }
-}
-
 string ReadFromStream(tensorflow::io::BufferedInputStream* stream,
                       size_t bytes,
                       TF_Status* out_status) {
@@ -226,14 +220,6 @@ string ReadFromStream(tensorflow::io::BufferedInputStream* stream,
   return result;
 }
 
-void SeekInStream(tensorflow::io::BufferedInputStream* stream, int64 position,
-                  TF_Status* out_status) {
-  tensorflow::Status status = stream->Seek(position);
-  if (!status.ok()) {
-    Set_TF_Status_from_Status(out_status, status);
-  }
-}
-
 %}
 
 // Ensure that the returned object is destroyed when its wrapper is
@@ -266,24 +252,28 @@ tensorflow::WritableFile* CreateWritableFile(const string& filename,
                                              TF_Status* out_status);
 void AppendToFile(const string& file_content, tensorflow::WritableFile* file,
                   TF_Status* out_status);
-void FlushWritableFile(tensorflow::WritableFile* file, TF_Status* out_status);
 string ReadFromStream(tensorflow::io::BufferedInputStream* stream,
                       size_t bytes,
                       TF_Status* out_status);
-void SeekInStream(tensorflow::io::BufferedInputStream* stream, int64 position,
-                  TF_Status* out_status);
+
+%ignore tensorflow::Status::operator=;
+%include "tensorflow/core/lib/core/status.h"
 
 %ignoreall
 %unignore tensorflow::io::BufferedInputStream;
 %unignore tensorflow::io::BufferedInputStream::~BufferedInputStream;
 %unignore tensorflow::io::BufferedInputStream::ReadLineAsString;
+%unignore tensorflow::io::BufferedInputStream::Seek;
 %unignore tensorflow::io::BufferedInputStream::Tell;
 %unignore tensorflow::WritableFile;
+%unignore tensorflow::WritableFile::Close;
+%unignore tensorflow::WritableFile::Flush;
 %unignore tensorflow::WritableFile::~WritableFile;
 %include "tensorflow/core/platform/file_system.h"
 %include "tensorflow/core/lib/io/inputstream_interface.h"
 %include "tensorflow/core/lib/io/buffered_inputstream.h"
 %unignoreall
 
+%include "tensorflow/c/tf_status_helper.h"
 %include "tensorflow/core/lib/io/path.h"
-%include "tensorflow/core/platform/file_statistics.h"
+%include "tensorflow/core/platform/file_statistics.h"
\ No newline at end of file
diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py
index a250903da8f..b8295dcb65a 100644
--- a/tensorflow/python/lib/io/file_io.py
+++ b/tensorflow/python/lib/io/file_io.py
@@ -114,7 +114,8 @@ class FileIO(object):
     """Seeks to the position in the file."""
     self._preread_check()
     with errors.raise_exception_on_not_ok_status() as status:
-      return pywrap_tensorflow.SeekInStream(self._read_buf, position, status)
+      ret_status = self._read_buf.Seek(position)
+      pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status)
 
   def readline(self):
     r"""Reads the next line from the file. Leaves the '\n' at the end."""
@@ -168,12 +169,16 @@ class FileIO(object):
     """
     if self._writable_file:
       with errors.raise_exception_on_not_ok_status() as status:
-        pywrap_tensorflow.FlushWritableFile(self._writable_file, status)
+        ret_status = self._writable_file.Flush()
+        pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status)
 
   def close(self):
     """Closes FileIO. Should be called for the WritableFile to be flushed."""
     self._read_buf = None
-    self.flush()
+    if self._writable_file:
+      with errors.raise_exception_on_not_ok_status() as status:
+        ret_status = self._writable_file.Close()
+        pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status)
     self._writable_file = None
 
 
diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py
index efaba32738b..cd4ccf35781 100644
--- a/tensorflow/python/lib/io/file_io_test.py
+++ b/tensorflow/python/lib/io/file_io_test.py
@@ -59,6 +59,15 @@ class FileIoTest(tf.test.TestCase):
       file_contents = f.read()
       self.assertEqual(b"begin\na1\na2\n", file_contents)
 
+  def testMultipleFiles(self):
+    file_prefix = os.path.join(self._base_dir, "temp_file")
+    for i in range(5000):
+      f = file_io.FileIO(file_prefix + str(i), mode="w+")
+      f.write("testing")
+      f.flush()
+      self.assertEquals(b"testing", f.read())
+      f.close()
+
   def testMultipleWrites(self):
     file_path = os.path.join(self._base_dir, "temp_file")
     with file_io.FileIO(file_path, mode="w") as f:
@@ -358,6 +367,9 @@ class FileIoTest(tf.test.TestCase):
     self.assertEqual(0, f.tell())
     self.assertEqual("testing1\n", f.readline())
 
+    with self.assertRaises(errors.InvalidArgumentError):
+      f.seek(-1)
+
   def testReadingIterator(self):
     file_path = os.path.join(self._base_dir, "temp_file")
     data = ["testing1\n", "testing2\n", "testing3\n", "\n", "testing5"]
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 5d2b43e8ebe..fd1c31f5ab1 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -2746,9 +2746,9 @@ def case(pred_fn_pairs, default, exclusive=False, name="case"):
   in `pred_fn_pairs` as well as `default` should return the same number
   and types of tensors.
 
-  If `exclusive==True`, all predicates are evaluated, and a logging operation
-  with an error is returned if more than one of the predicates evaluates to
-  True. If `exclusive==False`, execution stops are the first predicate which
+  If `exclusive==True`, all predicates are evaluated, and an exception is
+  thrown if more than one of the predicates evaluates to `True`.
+  If `exclusive==False`, execution stops are the first predicate which
   evaluates to True, and the tensors generated by the corresponding function
   are returned immediately. If none of the predicates evaluate to True, this
   operation returns the tensors generated by `default`.
@@ -2792,7 +2792,7 @@ def case(pred_fn_pairs, default, exclusive=False, name="case"):
     pred_fn_pairs: Dict or list of pairs of a boolean scalar tensor and a
                    callable which returns a list of tensors.
     default: A callable that returns a list of tensors.
-    exclusive: True iff more than one predicate is allowed to evaluate to True.
+    exclusive: True iff at most one predicate is allowed to evaluate to `True`.
     name: A name for this operation (optional).
 
   Returns:
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index 3d3149f640e..06d7308b384 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -1395,9 +1395,10 @@ class SparseConditionalAccumulator(ConditionalAccumulatorBase):
     return gen_data_flow_ops.sparse_accumulator_apply_gradient(
         self._accumulator_ref,
         local_step=local_step,
-        gradient_indices=grad_indices,
+        gradient_indices=math_ops.to_int64(grad_indices),
         gradient_values=grad_values,
-        gradient_shape=[] if grad_shape is None else grad_shape,
+        gradient_shape=math_ops.to_int64([] if grad_shape is None else
+                                         grad_shape),
         has_known_shape=(grad_shape is not None),
         name=name)
 
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
index e1f0ba51f8f..b55a8003622 100644
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@@ -139,7 +139,7 @@ def constant_initializer(value=0, dtype=dtypes.float32):
     >>>   x = tf.get_variable('x', shape=[2, 3], initializer=init)
 
     ValueError: Too many elements provided. Needed at most 6, but received 8
-    ```
+  ```
   """
   def _initializer(shape, dtype=dtype, partition_info=None):
     return constant_op.constant(value, dtype=dtype, shape=shape)
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index 52740063b9e..da411044384 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -19,7 +19,9 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.framework import common_shapes
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_linalg_ops
 from tensorflow.python.ops import math_ops
 # go/tf-wildcard-import
@@ -84,6 +86,65 @@ def cholesky_solve(chol, rhs, name=None):
     return x
 
 
+def eye(
+    num_rows,
+    num_columns=None,
+    batch_shape=None,
+    dtype=dtypes.float32,
+    name=None):
+  """Construct an identity matrix, or a batch of matrices.
+
+  ```python
+  # Construct one identity matrix.
+  tf.eye(2)
+  ==> [[1., 0.],
+       [0., 1.]]
+
+  # Construct a batch of 3 identity matricies, each 2 x 2.
+  # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2.
+  batch_identity = tf.eye(2, batch_shape=[3])
+
+  # Construct one 2 x 3 "identity" matrix
+  tf.eye(2, num_columns=3)
+  ==> [[ 1.,  0.,  0.],
+       [ 0.,  1.,  0.]]
+  ```
+
+  Args:
+    num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows
+      in each batch matrix.
+    num_columns: Optional non-negative `int32` scalar `Tensor` giving the number
+      of columns in each batch matrix.  Defaults to `num_rows`.
+    batch_shape:  `int32` `Tensor`.  If provided, returned `Tensor` will have
+      leading batch dimensions of this shape.
+    dtype:  The type of an element in the resulting `Tensor`
+    name:  A name for this `Op`.  Defaults to "eye".
+
+  Returns:
+    A `Tensor` of shape `batch_shape + [num_rows, num_columns]`
+  """
+  with ops.name_scope(
+      name, default_name="eye", values=[num_rows, num_columns, batch_shape]):
+
+    batch_shape = [] if batch_shape is None else batch_shape
+    batch_shape = ops.convert_to_tensor(
+        batch_shape, name="shape", dtype=dtypes.int32)
+
+    if num_columns is None:
+      diag_size = num_rows
+    else:
+      diag_size = math_ops.minimum(num_rows, num_columns)
+    diag_shape = array_ops.concat(0, (batch_shape, [diag_size]))
+    diag_ones = array_ops.ones(diag_shape, dtype=dtype)
+
+    if num_columns is None:
+      return array_ops.matrix_diag(diag_ones)
+    else:
+      shape = array_ops.concat(0, (batch_shape, [num_rows, num_columns]))
+      zero_matrix = array_ops.zeros(shape, dtype=dtype)
+      return array_ops.matrix_set_diag(zero_matrix, diag_ones)
+
+
 def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
   r"""Solves one or more linear least-squares problems.
 
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 4c61669d94a..23f141039a3 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -80,6 +80,7 @@ functions on matrices to your graph.
 @@trace
 @@transpose
 
+@@eye
 @@matrix_diag
 @@matrix_diag_part
 @@matrix_band_part
diff --git a/tensorflow/python/ops/summary_ops.py b/tensorflow/python/ops/summary_ops.py
index 0feb456fe9a..d0c6fe8aa5e 100644
--- a/tensorflow/python/ops/summary_ops.py
+++ b/tensorflow/python/ops/summary_ops.py
@@ -18,13 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from google.protobuf import json_format
+from tensorflow.core.framework import summary_pb2
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_logging_ops
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_logging_ops import *
-
 # pylint: enable=wildcard-import
 
 
@@ -35,12 +36,11 @@ def _Collect(val, collections, default_collections):
     ops.add_to_collection(key, val)
 
 
-def tensor_summary(display_name,  # pylint: disable=invalid-name
-                   tensor,
-                   description="",
-                   labels=None,
-                   collections=None,
-                   name=None):
+def tensor_summary(  # pylint: disable=invalid-name
+    name,
+    tensor,
+    summary_description=None,
+    collections=None):
   # pylint: disable=line-too-long
   """Outputs a `Summary` protocol buffer with a serialized tensor.proto.
 
@@ -49,19 +49,12 @@ def tensor_summary(display_name,  # pylint: disable=invalid-name
   has one summary value containing the input tensor.
 
   Args:
-    display_name: A name to associate with the data series. Will be used to
-      organize output data and as a name in visualizers.
+    name: A name for the generated node. Will also serve as the series name in
+      TensorBoard.
     tensor: A tensor of any type and shape to serialize.
-    description: An optional long description of the data being output.
-    labels: a list of strings used to specify how the data can be interpreted,
-      for example:
-      * `'encoding:image/jpg'` for a string tensor containing jpg images
-      * `'encoding:proto/X/Y/foo.proto'` for a string tensor containing Foos
-      * `'group:$groupName/$roleInGroup'` for a tensor that is related to
-         other tensors that are all in a group. (e.g. bounding boxes and images)
+    summary_description: Optional summary_pb2.SummaryDescription()
     collections: Optional list of graph collections keys. The new summary op is
       added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
-    name: An optional name for the generated node (optional).
 
   Returns:
     A scalar `Tensor` of type `string`. The serialized `Summary` protocol
@@ -69,12 +62,14 @@ def tensor_summary(display_name,  # pylint: disable=invalid-name
   """
   # pylint: enable=line-too-long
 
-  with ops.name_scope(name, "TensorSummary", [tensor]) as scope:
+  if summary_description is None:
+    summary_description = summary_pb2.SummaryDescription()
+
+  description = json_format.MessageToJson(summary_description)
+  with ops.name_scope(name, None, [tensor]) as scope:
     val = gen_logging_ops._tensor_summary(
-        display_name=display_name,
         tensor=tensor,
         description=description,
-        labels=labels,
         name=scope)
     _Collect(val, collections, [ops.GraphKeys.SUMMARIES])
   return val
diff --git a/tensorflow/python/platform/resource_loader.py b/tensorflow/python/platform/resource_loader.py
index 5bcd054fd2d..3daf2d8729b 100644
--- a/tensorflow/python/platform/resource_loader.py
+++ b/tensorflow/python/platform/resource_loader.py
@@ -23,8 +23,6 @@ import inspect
 import os.path
 import sys
 
-from tensorflow.python.platform import tf_logging as logging
-
 
 def load_resource(path):
   """Load the resource at given path, where path is relative to tensorflow/.
@@ -43,12 +41,8 @@ def load_resource(path):
           os.path.dirname(__file__), os.pardir, os.pardir))
   path = os.path.join(tensorflow_root, path)
   path = os.path.abspath(path)
-  try:
-    with open(path, 'rb') as f:
-      return f.read()
-  except IOError as e:
-    logging.warning('IOError %s on path %s', e, path)
-    raise e
+  with open(path, 'rb') as f:
+    return f.read()
 
 
 # pylint: disable=protected-access
diff --git a/tensorflow/python/saved_model/example/saved_model_half_plus_two.py b/tensorflow/python/saved_model/example/saved_model_half_plus_two.py
index 5d084a319f0..71cff17c6ac 100644
--- a/tensorflow/python/saved_model/example/saved_model_half_plus_two.py
+++ b/tensorflow/python/saved_model/example/saved_model_half_plus_two.py
@@ -29,13 +29,36 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
 import tensorflow as tf
 
 from tensorflow.core.protobuf import meta_graph_pb2
+from tensorflow.python.lib.io import file_io
 from tensorflow.python.saved_model import builder as saved_model_builder
 from tensorflow.python.saved_model import constants
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.saved_model import utils
+from tensorflow.python.util import compat
+
+
+def _write_assets(assets_directory, assets_filename):
+  """Writes asset files to be used with SavedModel for half plus two.
+
+  Args:
+    assets_directory: The directory to which the assets should be written.
+    assets_filename: Name of the file to which the asset contents should be
+        written.
+
+  Returns:
+    The path to which the assets file was written.
+  """
+  if not file_io.file_exists(assets_directory):
+    file_io.recursive_create_dir(assets_directory)
+
+  path = os.path.join(
+      compat.as_bytes(assets_directory), compat.as_bytes(assets_filename))
+  file_io.write_string_to_file(path, "asset-file-contents")
+  return path
 
 
 def _generate_saved_model_for_half_plus_two(export_dir, as_text=False):
@@ -64,6 +87,17 @@ def _generate_saved_model_for_half_plus_two(export_dir, as_text=False):
     x = tf.identity(tf_example["x"], name="x")
     y = tf.add(tf.mul(a, x), b, name="y")
 
+    # Create an assets file that can be saved and restored as part of the
+    # SavedModel.
+    original_assets_directory = "/tmp/original/export/assets"
+    original_assets_filename = "foo.txt"
+    original_assets_filepath = _write_assets(original_assets_directory,
+                                             original_assets_filename)
+
+    # Set up the assets collection.
+    assets_filepath = tf.constant(original_assets_filepath)
+    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, assets_filepath)
+
     # Set up the signature for regression with input and output tensor
     # specification.
     input_tensor = meta_graph_pb2.TensorInfo()
@@ -84,16 +118,19 @@ def _generate_saved_model_for_half_plus_two(export_dir, as_text=False):
         signature_def_map={
             signature_constants.REGRESS_METHOD_NAME:
                 signature_def
-        })
+        },
+        assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS))
     builder.save(as_text)
 
 
 def main(_):
   export_dir_pb = "/tmp/saved_model/half_plus_two"
   _generate_saved_model_for_half_plus_two(export_dir_pb)
+  print("SavedModel generated at: %s" % export_dir_pb)
 
   export_dir_pbtxt = "/tmp/saved_model/half_plus_two_pbtxt"
   _generate_saved_model_for_half_plus_two(export_dir_pbtxt, as_text=True)
+  print("SavedModel generated at: %s" % export_dir_pbtxt)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/summary/summary.py b/tensorflow/python/summary/summary.py
index 46e33aae271..b47e4e4e447 100644
--- a/tensorflow/python/summary/summary.py
+++ b/tensorflow/python/summary/summary.py
@@ -27,34 +27,25 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.framework import ops
+from tensorflow.core.framework import summary_pb2
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework.dtypes import as_dtype
 from tensorflow.python.ops.summary_ops import tensor_summary
 from tensorflow.python.util.all_util import make_all
 
-SCALAR_SUMMARY_LABEL = "tf_summary_type:scalar"
 
-
-def scalar(display_name,
-           tensor,
-           description="",
-           labels=None,
-           collections=None,
-           name=None):
+def scalar(name, tensor, summary_description=None, collections=None):
   """Outputs a `Summary` protocol buffer containing a single scalar value.
 
   The generated Summary has a Tensor.proto containing the input Tensor.
 
   Args:
-    display_name: A name to associate with the data series. Will be used to
-      organize output data and as a name in visualizers.
+    name: A name for the generated node. Will also serve as the series name in
+      TensorBoard.
     tensor: A tensor containing a single floating point or integer value.
-    description: An optional long description of the data being output.
-    labels: a list of strings used to attach metadata.
+    summary_description: Optional summary_description_pb2.SummaryDescription
     collections: Optional list of graph collections keys. The new summary op is
       added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
-    name: An optional name for the generated node (optional).
 
   Returns:
     A scalar `Tensor` of type `string`. Which contains a `Summary` protobuf.
@@ -62,7 +53,6 @@ def scalar(display_name,
   Raises:
     ValueError: If tensor has the wrong shape or type.
   """
-
   dtype = as_dtype(tensor.dtype)
   if dtype.is_quantized or not (dtype.is_integer or dtype.is_floating):
     raise ValueError("Can't create scalar summary for type %s." % dtype)
@@ -71,17 +61,11 @@ def scalar(display_name,
   if not shape.is_compatible_with(tensor_shape.scalar()):
     raise ValueError("Can't create scalar summary for shape %s." % shape)
 
-  if labels is None:
-    labels = []
-  else:
-    labels = labels[:]  # Otherwise we would mutate the input argument
+  if summary_description is None:
+    summary_description = summary_pb2.SummaryDescription()
+  summary_description.type_hint = "scalar"
 
-  labels.append(SCALAR_SUMMARY_LABEL)
-
-  with ops.name_scope(name, "ScalarSummary", [tensor]):
-    tensor = ops.convert_to_tensor(tensor)
-    return tensor_summary(display_name, tensor, description, labels,
-                          collections, name)
+  return tensor_summary(name, tensor, summary_description, collections)
 
 
 __all__ = make_all(__name__)
diff --git a/tensorflow/python/summary/summary_test.py b/tensorflow/python/summary/summary_test.py
index fb49759ba0b..34c14dbd878 100644
--- a/tensorflow/python/summary/summary_test.py
+++ b/tensorflow/python/summary/summary_test.py
@@ -17,9 +17,10 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import six
 import tensorflow as tf
 
+from google.protobuf import json_format
+from tensorflow.core.framework import summary_pb2
 from tensorflow.core.framework import types_pb2
 
 
@@ -64,24 +65,15 @@ class ScalarSummaryTest(tf.test.TestCase):
     with self.assertRaises(ValueError):
       tf.summary.scalar('3', c3)
 
-  def testLabelsAdded(self):
-    c = tf.constant(0)
-
-    no_labels = tf.summary.scalar('2', c)
-    labels = tf.summary.scalar('1', c, labels=['foo'])
-
-    def _GetLabels(n):
-      return n.op.get_attr('labels')
-
-    expected_label = six.b(tf.summary.SCALAR_SUMMARY_LABEL)
-    self.assertEquals(_GetLabels(no_labels), [expected_label])
-    self.assertEquals(_GetLabels(labels), [six.b('foo'), expected_label])
-
   def testTensorSummaryOpCreated(self):
     c = tf.constant(0)
-    s = tf.summary.scalar('', c)
-    self.assertEquals(s.op.type, 'TensorSummary')
-    self.assertEquals(s.op.inputs[0], c)
+    s = tf.summary.scalar('x', c)
+    self.assertEqual(s.op.type, 'TensorSummary')
+    self.assertEqual(s.op.inputs[0], c)
+    description = s.op.get_attr('description')
+    summary_description = summary_pb2.SummaryDescription()
+    json_format.Parse(description, summary_description)
+    self.assertEqual(summary_description.type_hint, 'scalar')
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py
index 5e134777c02..e6e614c8bb6 100644
--- a/tensorflow/python/training/basic_session_run_hooks.py
+++ b/tensorflow/python/training/basic_session_run_hooks.py
@@ -45,6 +45,56 @@ from tensorflow.python.training.session_run_hook import SessionRunArgs
 from tensorflow.python.training.summary_io import SummaryWriterCache
 
 
+class _SecondOrStepTimer(object):
+  """Timer that triggers at most once every N seconds or once every N steps.
+  """
+
+  def __init__(self, every_secs=None, every_steps=None):
+    self._every_secs = every_secs
+    self._every_steps = every_steps
+    self._last_triggered_step = None
+    self._last_triggered_time = None
+
+    if self._every_secs is None and self._every_steps is None:
+      raise ValueError("Either every_secs or every_steps should be provided.")
+    if (self._every_secs is not None) and (self._every_steps is not None):
+      raise ValueError("Can not provide both every_secs and every_steps.")
+
+  def should_trigger_for_step(self, step):
+    """Return true if the timer should trigger for the specified step.
+
+    Args:
+      step: Training step to trigger on.
+
+    Returns:
+      True if the difference between the current time and the time of the last
+      trigger exceeds `every_secs`, or if the difference between the current
+      step and the last triggered step exceeds `every_steps`. False otherwise.
+    """
+    if self._last_triggered_step == step:
+      return False
+
+    if self._last_triggered_step is None:
+      return True
+
+    if self._every_secs is not None:
+      if time.time() >= self._last_triggered_time + self._every_secs:
+        return True
+
+    if self._every_steps is not None:
+      if step >= self._last_triggered_step + self._every_steps:
+        return True
+
+    return False
+
+  def update_last_triggered_step(self, step):
+    self._last_triggered_time = time.time()
+    self._last_triggered_step = step
+
+  def last_triggered_step(self):
+    return self._last_triggered_step
+
+
 class LoggingTensorHook(session_run_hook.SessionRunHook):
   """Prints given tensors every N iteration.
 
@@ -165,26 +215,17 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook):
     self._summary_writer = SummaryWriterCache.get(checkpoint_dir)
     self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
     self._scaffold = scaffold
-    self._save_secs = save_secs
-    self._save_steps = save_steps
-    self._last_saved_time = None
-    self._last_saved_step = None
-
-    if save_steps is None and save_secs is None:
-      raise ValueError("Either save_steps or save_secs should be provided")
-    if (save_steps is not None) and (save_secs is not None):
-      raise ValueError("Can not provide both save_steps and save_secs.")
+    self._timer = _SecondOrStepTimer(every_secs=save_secs,
+                                     every_steps=save_steps)
 
   def begin(self):
-    self._last_saved_time = None
-    self._last_saved_step = None
     self._global_step_tensor = training_util.get_global_step()
     if self._global_step_tensor is None:
       raise RuntimeError(
           "Global step should be created to use CheckpointSaverHook.")
 
   def before_run(self, run_context):  # pylint: disable=unused-argument
-    if self._last_saved_time is None:
+    if self._timer.last_triggered_step() is None:
       # Write graph in the first call.
       training_util.write_graph(
           ops.get_default_graph().as_graph_def(add_shapes=True),
@@ -202,28 +243,18 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook):
 
   def after_run(self, run_context, run_values):
     global_step = run_values.results
-    if self._last_saved_time is None:
+    if self._timer.should_trigger_for_step(global_step):
+      self._timer.update_last_triggered_step(global_step)
       self._save(global_step, run_context.session)
 
-    if self._save_steps is not None:
-      if global_step >= self._last_saved_step + self._save_steps:
-        self._save(global_step, run_context.session)
-
-    if self._save_secs is not None:
-      if time.time() >= self._last_saved_time + self._save_secs:
-        self._save(global_step, run_context.session)
-
   def end(self, session):
     last_step = session.run(training_util.get_global_step())
-    self._save(last_step, session)
+    if last_step != self._timer.last_triggered_step():
+      self._save(last_step, session)
 
   def _save(self, step, session):
     """Saves the latest checkpoint."""
-    if step == self._last_saved_step:
-      return
     logging.info("Saving checkpoints for %d into %s.", step, self._save_path)
-    self._last_saved_time = time.time()
-    self._last_saved_step = step
     if self._saver is None:
       self._scaffold.saver.save(session, self._save_path, global_step=step)
     else:
@@ -320,6 +351,7 @@ class SummarySaverHook(session_run_hook.SessionRunHook):
 
   def __init__(self,
                save_steps=100,
+               save_secs=None,
                output_dir=None,
                summary_writer=None,
                scaffold=None,
@@ -327,7 +359,9 @@ class SummarySaverHook(session_run_hook.SessionRunHook):
     """Initializes a `SummarySaver` monitor.
 
     Args:
-      save_steps: `int`, save summaries every N steps. See `EveryN`.
+      save_steps: `int`, save summaries every N steps. Exactly one of
+          `save_secs` and `save_steps` should be set.
+      save_secs: `int`, save summaries every N seconds.
       output_dir: `string`, the directory to save the summaries to. Only used
           if no `summary_writer` is supplied.
       summary_writer: `SummaryWriter`. If `None` and an `output_dir` was passed,
@@ -337,24 +371,26 @@ class SummarySaverHook(session_run_hook.SessionRunHook):
           buffer, as output by TF summary methods like `scalar_summary` or
           `merge_all_summaries`.
     """
-    # TODO(ipolosukhin): Implement every N seconds.
     self._summary_op = summary_op
     self._summary_writer = summary_writer
     if summary_writer is None and output_dir:
       self._summary_writer = SummaryWriterCache.get(output_dir)
     self._scaffold = scaffold
-    self._save_steps = save_steps
+    self._timer = _SecondOrStepTimer(every_secs=save_secs,
+                                     every_steps=save_steps)
     # TODO(mdan): Throw an error if output_dir and summary_writer are None.
 
   def begin(self):
-    self._last_saved_step = None
-    self._request_summary = True
+    self._next_step = None
     self._global_step_tensor = training_util.get_global_step()
     if self._global_step_tensor is None:
       raise RuntimeError(
           "Global step should be created to use SummarySaverHook.")
 
   def before_run(self, run_context):  # pylint: disable=unused-argument
+    self._request_summary = (
+        self._next_step is None or
+        self._timer.should_trigger_for_step(self._next_step))
     requests = {"global_step": self._global_step_tensor}
     if self._request_summary:
       if self._summary_op is not None:
@@ -371,18 +407,17 @@ class SummarySaverHook(session_run_hook.SessionRunHook):
 
     global_step = run_values.results["global_step"]
 
-    if self._last_saved_step is None:
+    if self._next_step is None:
       self._summary_writer.add_session_log(
           SessionLog(status=SessionLog.START), global_step)
 
     if self._request_summary:
-      self._last_saved_step = global_step
+      self._timer.update_last_triggered_step(global_step)
       if "summary" in run_values.results:
         self._summary_writer.add_summary(run_values.results["summary"],
                                          global_step)
 
-    self._request_summary = (
-        global_step >= self._last_saved_step + self._save_steps - 1)
+    self._next_step = global_step + 1
 
   def end(self, session=None):
     if self._summary_writer:
diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py
index b18d2b7dac7..858f4bc1a87 100644
--- a/tensorflow/python/training/basic_session_run_hooks_test.py
+++ b/tensorflow/python/training/basic_session_run_hooks_test.py
@@ -19,16 +19,50 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
 import shutil
 import tempfile
 import time
 
 import tensorflow as tf
 
+from tensorflow.contrib import testing
+from tensorflow.python.training import basic_session_run_hooks
 from tensorflow.python.training import monitored_session
 
 
+class SecondOrStepTimerTest(tf.test.TestCase):
+
+  def test_raise_in_both_secs_and_steps(self):
+    with self.assertRaises(ValueError):
+      basic_session_run_hooks._SecondOrStepTimer(every_secs=2.0, every_steps=10)
+
+  def test_raise_in_none_secs_and_steps(self):
+    with self.assertRaises(ValueError):
+      basic_session_run_hooks._SecondOrStepTimer()
+
+  def test_every_secs(self):
+    timer = basic_session_run_hooks._SecondOrStepTimer(every_secs=1.0)
+    self.assertTrue(timer.should_trigger_for_step(1))
+
+    timer.update_last_triggered_step(1)
+    self.assertFalse(timer.should_trigger_for_step(1))
+    self.assertFalse(timer.should_trigger_for_step(2))
+
+    time.sleep(1.0)
+    self.assertFalse(timer.should_trigger_for_step(1))
+    self.assertTrue(timer.should_trigger_for_step(2))
+
+  def test_every_steps(self):
+    timer = basic_session_run_hooks._SecondOrStepTimer(every_steps=3)
+    self.assertTrue(timer.should_trigger_for_step(1))
+
+    timer.update_last_triggered_step(1)
+    self.assertFalse(timer.should_trigger_for_step(1))
+    self.assertFalse(timer.should_trigger_for_step(2))
+    self.assertFalse(timer.should_trigger_for_step(3))
+    self.assertTrue(timer.should_trigger_for_step(4))
+
+
 class StopAtStepTest(tf.test.TestCase):
 
   def test_raise_in_both_last_step_and_num_steps(self):
@@ -246,5 +280,144 @@ class CheckpointSaverHookTest(tf.test.TestCase):
             self.model_dir, self.global_step.name))
 
 
+class StepCounterHookTest(tf.test.TestCase):
+
+  def setUp(self):
+    self.log_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    shutil.rmtree(self.log_dir, ignore_errors=True)
+
+  def test_step_counter(self):
+    with tf.Graph().as_default() as g, tf.Session() as sess:
+      global_step = tf.contrib.framework.get_or_create_global_step()
+      train_op = tf.assign_add(global_step, 1)
+      summary_writer = testing.FakeSummaryWriter(self.log_dir, g)
+      hook = tf.train.StepCounterHook(
+          summary_writer=summary_writer, every_n_steps=10)
+      hook.begin()
+      sess.run(tf.initialize_all_variables())
+      mon_sess = monitored_session._HookedSession(sess, [hook])
+      for _ in range(30):
+        time.sleep(0.01)
+        mon_sess.run(train_op)
+      hook.end(sess)
+      summary_writer.assert_summaries(
+          test_case=self,
+          expected_logdir=self.log_dir,
+          expected_graph=g,
+          expected_summaries={})
+      for step in [11, 21]:
+        summary_value = summary_writer.summaries[step][0].value[0]
+        self.assertTrue(summary_value.tag, 'global_step/sec')
+        # check at least 10 steps per sec is recorded.
+        self.assertGreater(summary_value.simple_value, 10)
+
+
+class SummarySaverHookTest(tf.test.TestCase):
+
+  def setUp(self):
+    tf.test.TestCase.setUp(self)
+
+    self.log_dir = 'log/dir'
+    self.summary_writer = testing.FakeSummaryWriter(self.log_dir)
+
+    var = tf.Variable(0.0)
+    tensor = tf.assign_add(var, 1.0)
+    self.summary_op = tf.scalar_summary('my_summary', tensor)
+
+    global_step = tf.contrib.framework.get_or_create_global_step()
+    self.train_op = tf.assign_add(global_step, 1)
+
+  def test_raise_in_both_secs_and_steps(self):
+    with self.assertRaises(ValueError):
+      tf.train.SummarySaverHook(
+          save_secs=10,
+          save_steps=20,
+          summary_writer=self.summary_writer)
+
+  def test_raise_in_none_secs_and_steps(self):
+    with self.assertRaises(ValueError):
+      tf.train.SummarySaverHook(
+          save_secs=None,
+          save_steps=None,
+          summary_writer=self.summary_writer)
+
+  def test_save_steps(self):
+    hook = tf.train.SummarySaverHook(
+        save_steps=8,
+        summary_writer=self.summary_writer,
+        summary_op=self.summary_op)
+
+    with self.test_session() as sess:
+      hook.begin()
+      sess.run(tf.initialize_all_variables())
+      mon_sess = monitored_session._HookedSession(sess, [hook])
+      for _ in range(30):
+        mon_sess.run(self.train_op)
+      hook.end(sess)
+
+    self.summary_writer.assert_summaries(
+        test_case=self,
+        expected_logdir=self.log_dir,
+        expected_summaries={
+            1: {'my_summary': 1.0},
+            9: {'my_summary': 2.0},
+            17: {'my_summary': 3.0},
+            25: {'my_summary': 4.0},
+        })
+
+  def test_save_secs_saving_once_every_step(self):
+    hook = tf.train.SummarySaverHook(
+        save_steps=None,
+        save_secs=0.5,
+        summary_writer=self.summary_writer,
+        summary_op=self.summary_op)
+
+    with self.test_session() as sess:
+      hook.begin()
+      sess.run(tf.initialize_all_variables())
+      mon_sess = monitored_session._HookedSession(sess, [hook])
+      for _ in range(4):
+        mon_sess.run(self.train_op)
+        time.sleep(0.5)
+      hook.end(sess)
+
+    self.summary_writer.assert_summaries(
+        test_case=self,
+        expected_logdir=self.log_dir,
+        expected_summaries={
+            1: {'my_summary': 1.0},
+            2: {'my_summary': 2.0},
+            3: {'my_summary': 3.0},
+            4: {'my_summary': 4.0},
+        })
+
+  def test_save_secs_saving_once_every_three_steps(self):
+    hook = tf.train.SummarySaverHook(
+        save_steps=None,
+        save_secs=0.9,
+        summary_writer=self.summary_writer,
+        summary_op=self.summary_op)
+
+    with self.test_session() as sess:
+      hook.begin()
+      sess.run(tf.initialize_all_variables())
+      mon_sess = monitored_session._HookedSession(sess, [hook])
+      for _ in range(8):
+        mon_sess.run(self.train_op)
+        time.sleep(0.3)
+      hook.end(sess)
+
+    self.summary_writer.assert_summaries(
+        test_case=self,
+        expected_logdir=self.log_dir,
+        expected_summaries={
+            1: {'my_summary': 1.0},
+            4: {'my_summary': 2.0},
+            7: {'my_summary': 3.0},
+        })
+
+
 if __name__ == '__main__':
   tf.test.main()
diff --git a/tensorflow/python/training/sync_replicas_optimizer_test.py b/tensorflow/python/training/sync_replicas_optimizer_test.py
index 3025c9fb291..492adfc9b23 100644
--- a/tensorflow/python/training/sync_replicas_optimizer_test.py
+++ b/tensorflow/python/training/sync_replicas_optimizer_test.py
@@ -237,6 +237,9 @@ class SyncReplicasOptimizerV2Test(tf.test.TestCase):
     # The global step should have been updated since we only need to collect 2
     # gradients. The variables should now have the new values after the average
     # of the gradients from worker 0/2 are applied.
+    while global_step.eval(session=sessions[1]) != 1:
+      time.sleep(0.01)
+
     self.assertAllEqual(1, global_step.eval(session=sessions[1]))
     self.assertAllClose(0-(0.1+0.5)/2*2.0, var_0_g_1.eval(session=sessions[1]))
     self.assertAllClose(1-(0.9+1.3)/2*2.0, var_1_g_1.eval(session=sessions[1]))
diff --git a/tensorflow/tensorboard/backend/handler.py b/tensorflow/tensorboard/backend/handler.py
index ef484e36d59..2b1c65399e9 100644
--- a/tensorflow/tensorboard/backend/handler.py
+++ b/tensorflow/tensorboard/backend/handler.py
@@ -506,7 +506,8 @@ class TensorboardHandler(BaseHTTPServer.BaseHTTPRequestHandler):
       plugin = REGISTERED_PLUGINS[name]()
       # Initialize the plugin by passing the main http handler.
       plugin.initialize(self)
-      plugin_handlers = plugin.get_plugin_handlers(self._multiplexer.RunPaths())
+      plugin_handlers = plugin.get_plugin_handlers(self._multiplexer.RunPaths(),
+                                                   self._logdir)
       for route, handler in six.iteritems(plugin_handlers):
         path = DATA_PREFIX + PLUGIN_PREFIX + '/' + name + route
         data_handlers[path] = handler
diff --git a/tensorflow/tensorboard/backend/server.py b/tensorflow/tensorboard/backend/server.py
index a3c7a790977..ddefa4f5948 100644
--- a/tensorflow/tensorboard/backend/server.py
+++ b/tensorflow/tensorboard/backend/server.py
@@ -68,8 +68,8 @@ def ParseEventFilesSpec(logdir):
   if logdir is None:
     return files
   for specification in logdir.split(','):
-    # If it's a gcs path, don't split on colon
-    if gcs.IsGCSPath(specification):
+    # If it's a gcs or hdfs path, don't split on colon
+    if gcs.IsGCSPath(specification) or specification.startswith('hdfs://'):
       run_name = None
       path = specification
     # If the spec looks like /foo:bar/baz, then we assume it's a path with a
diff --git a/tensorflow/tensorboard/components/index.html b/tensorflow/tensorboard/components/index.html
index 8765834fe6f..c790a76f753 100644
--- a/tensorflow/tensorboard/components/index.html
+++ b/tensorflow/tensorboard/components/index.html
@@ -28,12 +28,12 @@ limitations under the License.
         font-family: "RobotoDraft","Roboto",sans-serif;
       }
     </style>
-    <script src="../app/analytics.js"></script>
     <link rel="shortcut icon" href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAMQAAADECAMAAAD3eH5ZAAAABGdBTUEAALGPC/xhBQAAAAFzUkdCAK7OHOkAAAD/UExURfFlKfaELvFmKfNyK/67NvWALf68Nv69NvNxK/20NfyyNP22NfN0K/JrKvqhMv2zNf25Nf24Nf23NfeOL/yzNPyvNPJoKviWMPmeMfN1K/WBLfePL/FnKfeML/qlMvR7LPmcMfeLL/aJLvR5LPFoKfJuKvR3LP66NvywNPeNL/V/LfaILv21Nf26NfNzK/NvK/R6LPmaMfyxNPqfMvV+LfurM/iSMPmbMfJvKvmdMfumM/qiMvmZMfytNPJqKvysNPN2K/iYMPNwK/upM/JtKvJsKviVMPaHLvaGLvJpKvR8LPaKLvqkMvuqM/aFLvR4LPuoM/iTMPWDLfiRMPmYMXS0ngkAAALoSURBVHja7drnctpAFIbhFUISSKJ3MKYa0+y4xTW9937/15JkJhlTjhrSrHRmvuf/as6L0YLFCgEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMBJ6njenqspzgnPrsrGX9Zpi2tCrmnc6+dYNthVY5WpMmxQLWPdMsOuYVwzNj3ei2t3mQwaV43BJPDCS2NbJ5aEeuX/+9qcjQOtfFIkIkrvY2g4MVcmOBsFWbowKO/kNyj62gRpJcDaPBlxLr1B0zdG0C/8LzbJiJrshuvy1gzlA9+rD8mIkuyIJjFE3/dqnYwoSm7IUEPoD/wut8iIguSIDjlFxe/yfXL5vuSI21BTZLLhXoOILMO8Hxwa/L8bI0LfmUdhGowb2ZvT0e57pFNDgB06IlVyjmmIBl2T/nl9Rw6SD9GgSG/Q0uQkaW3XhmovKQ3eFQ4N2Uo9OQ1eFZsNerf7vP+rO4rhmY1Lg3vFVoP8+8BXg1sFnwbnCk4NThW8GuiKBDdkVVtTNFvNelVsNqTbyWnIOM2oeTRoyWvwmpJHg/ucXBrcJuXT4DwrpwZi2vy0VCx8YtXg/D2bU4OfiuQ3eFfE2KD4bfCqiLNB993gXsGlwa2CT4NzBacGIVQ6YsipQdh0xEdODUKjIxrSp88onZ8zbbFLg1DoiFO5BXvDGv2My9/JhUT8JUZTI0yDaNHLBzIbvqTDNYhUiVw/kdjQ1kM2CHFDPjKW+KzyRTF0g/ga9w9y+fANQpxvX8CU+Ny7FUWDeF3Y+g3lROIf4k0UDX9eCyvO531PyYhHga9zvPZJU5b73Y/eXj8Hv9D48n6HaF5LbcjRt8TZTtda5M1DfXnbkX1C0SHCFKzQB5Fe8op4GNGNHavvZESbVwT5r6W1xyuCPBY3Y9YgDqzknH/e3YfNzzuL30l0IebrZ5kKtuDIXt1n868ET6kf3/49tLvrCcZyF8Pu215dAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAcPIbNrBhOaBXucoAAAAASUVORK5CYII=">
     <link rel="import" href="tf-tensorboard/tf-tensorboard.html">
     <title>TensorBoard</title>
   </head>
   <body>
     <tf-tensorboard use-hash></tf-tensorboard>
+    <script src="../app/analytics.js"></script>
   </body>
 </html>
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html b/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html
index 4b588f63231..090e74fbc7e 100644
--- a/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html
@@ -44,7 +44,7 @@ categories are exclusive.
 <dom-module id="tf-categorizer">
   <template>
     <div class="inputs">
-      <tf-regex-group id="regex-group" regexes="{{regexes}}"></tf-regex-group>
+      <tf-regex-group id="regexGroup" regexes="{{regexes}}"></tf-regex-group>
     </div>
     <div id="underscore-categorization">
       <paper-checkbox
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/tf-multi-checkbox.html b/tensorflow/tensorboard/components/tf-dashboard-common/tf-multi-checkbox.html
index 346245e0b39..e934e8a9181 100644
--- a/tensorflow/tensorboard/components/tf-dashboard-common/tf-multi-checkbox.html
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/tf-multi-checkbox.html
@@ -177,7 +177,7 @@ handle these situations gracefully.
       }, // Runs that match the regex
       runToIsCheckedMapping: {
         type: Object,
-        value: function() {return {};}
+        value: TF.URIStorage.getObjectInitializer('runToIsCheckedMapping', {}),
       }, // run name -> Boolean (if its enabled)
       // (Allows state to persist across regex filtering)
       outSelected: {
@@ -195,8 +195,10 @@ handle these situations gracefully.
     },
     observers: [
       "_initializeRunToIsCheckedMapping(names.*)",
-      "_setIsolatorIcon(runToIsCheckedMapping.*)",
+      "_setIsolatorIcon(runToIsCheckedMapping)",
+      "_storeRunToIsCheckedMapping(runToIsCheckedMapping)"
     ],
+    _storeRunToIsCheckedMapping: TF.URIStorage.getObjectObserver('runToIsCheckedMapping', {}),
     makeRegex: function(regex) {
       try {
         return new RegExp(regex)
@@ -204,7 +206,7 @@ handle these situations gracefully.
         return null;
       }
     },
-    _setIsolatorIcon(change) {
+    _setIsolatorIcon: function() {
       var runMap = this.runToIsCheckedMapping;
       var numChecked = _.filter(_.values(runMap)).length;
       var buttons = Array.prototype.slice.call(this.querySelectorAll(".isolator"));
@@ -243,6 +245,8 @@ handle these situations gracefully.
     synchronizeColors: function(e) {
       if (!this.colorScale) return;
 
+      this._setIsolatorIcon();
+
       var checkboxes = Array.prototype.slice.call(this.querySelectorAll("paper-checkbox"));
       var scale = this.colorScale;
       checkboxes.forEach(function(p) {
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/tf-no-data-warning.html b/tensorflow/tensorboard/components/tf-dashboard-common/tf-no-data-warning.html
index ea0047f32ec..6a1a76bd267 100644
--- a/tensorflow/tensorboard/components/tf-dashboard-common/tf-no-data-warning.html
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/tf-no-data-warning.html
@@ -24,7 +24,7 @@ Display a warning when there is no data found.
   <template>
     <template is="dom-if" if="[[showWarning]]">
       <div class="warning">
-        <template is="dom-if" if="[[graphMode]]">
+        <template is="dom-if" if="[[_isGraph(dataType)]]">
           <h3>
             No graph definition files were found.
           </h3>
@@ -40,7 +40,41 @@ Display a warning when there is no data found.
             .
           </p>
         </template>
-        <template is="dom-if" if="[[!graphMode]]">
+        <template is="dom-if" if="[[_isProjector(dataType)]]">
+          <h3>
+            No projector data was found.
+          </h3>
+          <p>
+            Probable causes:
+            <ul>
+              <li>
+                There is no <code>projector_config.pbtxt</code> in the <code>logdir</code>.
+                To store a config file, create a
+                <a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/tensorboard/plugins/projector/projector_config.proto">
+                  <code>projector.ProjectorConfig</code>
+                </a>
+                proto and a
+                <a href="https://www.tensorflow.org/versions/master/api_docs/python/train.html#SummaryWriter">
+                  <code>tf.train.SummaryWriter</code>
+                </a> and pass them to
+                <code>projector.visualize_embeddings()</code>.
+                The <code>projector</code> module lives in
+                <a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/tensorboard/plugins/projector/__init__.py">
+                  <code>tensorflow.contrib.tensorboard.plugins</code>.
+                </a>
+              </li>
+              <li>
+                There is no checkpoint file. To save your model, create a
+                <a href="https://www.tensorflow.org/versions/master/api_docs/python/state_ops.html#Saver">
+                  <code>tf.train.Saver</code>
+                </a>
+                and save your model periodically
+                by calling <code>saver.save(session, LOG_DIR/model.ckpt, step)</code>.
+              </li>
+            </ul>
+          </p>
+        </template>
+        <template is="dom-if" if="[[_isOther(dataType)]]">
           <h3>
             No <span>[[dataType]]</span> data was found.
           </h3>
@@ -92,12 +126,17 @@ Display a warning when there is no data found.
       is: "tf-no-data-warning",
       properties: {
         dataType: String,
-        showWarning: Boolean,
-        graphMode: {type: Boolean, computed: "_isGraph(dataType)"},
+        showWarning: Boolean
       },
       _isGraph: function(dataType) {
         return dataType === "graph";
       },
+      _isProjector: function(dataType) {
+        return dataType === "projector";
+      },
+      _isOther: function(dataType) {
+        return !this._isGraph(dataType) && !this._isProjector(dataType);
+      }
     });
   </script>
 </dom-module>
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/tf-panes-helper.html b/tensorflow/tensorboard/components/tf-dashboard-common/tf-panes-helper.html
index 7384375894d..c434bd47282 100644
--- a/tensorflow/tensorboard/components/tf-dashboard-common/tf-panes-helper.html
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/tf-panes-helper.html
@@ -70,7 +70,7 @@ downloadLinkUrlFunction property to an appropriate value.
           <template is="dom-repeat" items="[[_categoryCards(category, selectedRuns.*, run2tag.*)]]">
               <div class="card">
                 <div class="card-title-container" style="border-color: [[_titleBorderColor(item.run)]]">
-                  <div class="card-title">[[item.tag]]</div>
+                  <div class="card-title" inner-h-t-m-l="[[_break(item.tag)]]"></div>
                   <template is="dom-if" if="[[repeatForRuns]]">
                     <div class="card-subtitle" title="[[item.run]]">[[item.run]]</div>
                   </template>
@@ -342,6 +342,10 @@ downloadLinkUrlFunction property to an appropriate value.
       },
       // TODO(renatoutsch): implement the instance forwarding for two-way data
       // binding.
+      // Add breaks to input so it will wrap nicely
+      _break: function(ipt) {
+        return ipt.replace(/([\/_-])/g, "$1<wbr>")
+      },
     });
   </script>
 </dom-module>
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/tf-regex-group.html b/tensorflow/tensorboard/components/tf-dashboard-common/tf-regex-group.html
index 67d6fa5e334..f00bd21c364 100644
--- a/tensorflow/tensorboard/components/tf-dashboard-common/tf-regex-group.html
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/tf-regex-group.html
@@ -28,12 +28,11 @@ Example:
   <tf-regex-group regexes="{{regexes}}"></tf-regex-group>
 
 It contains a series of regular expression input fields. From this, it computes
-`regexes', an array in which every element is either a string representing an
-active, valid, nonempty regular expression, or the value `null`
+`regexes', an array in which every element is either a string representing a
+valid, nonempty regular expression, or the value `null`
 
 Public Properties:
-`regexes` a readonly, notifying array of strings, where each string is an
-  active, valid, nonempty regex
+`regexes` a readonly, notifying array of strings, where each string is a regex
 
 It maintains an invariant that the final regex should always be an empty string,
 so the user can easily add more regular expressions. It does this by adding
@@ -47,17 +46,12 @@ more regexes).
     <div class="regex-list">
       <template is="dom-repeat" items="{{rawRegexes}}">
         <div class="regex-line">
-          <paper-checkbox
-            class="active-button"
-            checked="{{item.active}}"
-            disabled="[[!item.valid]]"
-          ></paper-checkbox>
           <paper-input
             id="text-input"
             class="regex-input"
             label="Write a regex to create a tag group"
             no-label-float
-            bind-value="{{item.regex}}"
+            value="{{item.regex}}"
             invalid="[[!item.valid]]"
             on-keyup="moveFocus"
           ></paper-input>
@@ -71,16 +65,11 @@ more regexes).
         </div>
         <style>
           .regex-input {
-            width: 230px;
+            width: 250px;
             display: inline-block;
             margin-left: -3px;
           }
 
-          paper-checkbox {
-            --paper-checkbox-checked-color: var(--tb-ui-dark-accent);
-            --paper-checkbox-unchecked-color: var(--tb-ui-dark-accent);
-          }
-
           .delete-button {
             color: var(--paper-grey-700);
             width: 40px;
@@ -111,16 +100,16 @@ more regexes).
       properties: {
         rawRegexes: {
           type: Array,
-          value: function() {
-            return [{regex: "", active: true, valid: true}];
-          }
+          value: TF.URIStorage.getObjectInitializer('rawRegexes', [{regex: "", valid: true}]),
         },
         regexes: {type: Array, computed: "usableRegexes(rawRegexes.*)", notify: true},
       },
       observers: [
         "addNewRegexIfNeeded(rawRegexes.*)",
         "checkValidity(rawRegexes.*)",
+        "_uriStoreRegexes(rawRegexes.*)",
       ],
+      _uriStoreRegexes: TF.URIStorage.getObjectObserver('rawRegexes', [{regex: "", valid: true}]),
       checkValidity: function(x) {
         var match = x.path.match(/rawRegexes\.(\d+)\.regex/);
         if (match) {
@@ -142,7 +131,7 @@ more regexes).
           // Checking validity here (rather than using the data property)
           // is necessary because otherwise we might send invalid regexes due
           // to the fact that this function can call before the observer does
-          return r.regex !== "" && r.active && isValid(r.regex);
+          return r.regex !== "" && isValid(r.regex);
         }).map(function(r) {
           return r.regex;
         });
@@ -150,7 +139,7 @@ more regexes).
       addNewRegexIfNeeded: function() {
         var last = this.rawRegexes[this.rawRegexes.length - 1];
         if (last.regex !== "") {
-          this.push("rawRegexes", {regex: "", active: true, valid: true});
+          this.push("rawRegexes", {regex: "", valid: true});
         }
       },
       deleteRegex: function(e) {
diff --git a/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-dashboard.html b/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-dashboard.html
index fa320bbad87..40e0b15413a 100644
--- a/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-dashboard.html
+++ b/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-dashboard.html
@@ -23,7 +23,7 @@ limitations under the License.
 <link rel="import" href="../tf-dashboard-common/tf-panes-helper.html">
 <link rel="import" href="../tf-dashboard-common/tf-sidebar-helper.html">
 <link rel="import" href="../tf-imports/lodash.html">
-<link rel="import" href="tf-distribution-chart.html">
+<link rel="import" href="../vz-distribution-chart/vz-distribution-chart.html">
 <link rel="import" href="../iron-collapse/iron-collapse.html">
 <link rel="import" href="../paper-icon-button/paper-icon-button.html">
 
@@ -40,7 +40,7 @@ charts are larger.
 Organizationally, the #plumbing div contains components that have no concrete
 manifestation and just effect data bindings or data loading. The .sidebar div
 contains shared controls provided by tf-sidebar-helper. The .center div
-contains tf-distribution-charts embedded inside tf-panes-helper's.
+contains vz-distribution-charts embedded inside tf-panes-helper's.
 -->
 <dom-module id="tf-distribution-dashboard">
   <template>
@@ -87,10 +87,10 @@ contains tf-distribution-charts embedded inside tf-panes-helper's.
           repeat-for-runs
           >
           <template>
-            <tf-distribution-chart
+            <vz-distribution-chart
               x-type="[[_xType]]"
               color-scale="[[_colorScale]]"
-              ></tf-distribution-chart>
+              ></vz-distribution-chart>
           </template>
         </tf-panes-helper>
       </div>
diff --git a/tensorflow/tensorboard/components/tf-graph-common/lib/render.ts b/tensorflow/tensorboard/components/tf-graph-common/lib/render.ts
index b4945eed105..3bea0061c62 100644
--- a/tensorflow/tensorboard/components/tf-graph-common/lib/render.ts
+++ b/tensorflow/tensorboard/components/tf-graph-common/lib/render.ts
@@ -1452,8 +1452,11 @@ function extractHighInOrOutDegree(renderNode: RenderGroupNodeInfo) {
   outDegreeUpperBound = Math.max(outDegreeUpperBound, minUpperBound);
   for (let i = validNodeCount - 1;
        nodeToOutDegree[sortedByOutDegree[i]] > outDegreeUpperBound; i--) {
-    if (graph.node(sortedByOutDegree[i]).isInExtract) {
-      // This node has already been extracted due to high in-degree. Ignore it.
+    let node = graph.node(sortedByOutDegree[i]);
+    if (!node || node.isInExtract) {
+      // This node has already been extracted due to high in-degree. It might
+      // have been removed from the graph in general (during in-degree
+      // extraction) due to a lack of neighbors. Do not extract this node twice.
       continue;
     }
 
diff --git a/tensorflow/tensorboard/components/tf-graph-dashboard/tf-graph-dashboard.html b/tensorflow/tensorboard/components/tf-graph-dashboard/tf-graph-dashboard.html
index bd63f5361b3..ae26f59762a 100644
--- a/tensorflow/tensorboard/components/tf-graph-dashboard/tf-graph-dashboard.html
+++ b/tensorflow/tensorboard/components/tf-graph-dashboard/tf-graph-dashboard.html
@@ -107,19 +107,20 @@ Polymer({
   reload: function() {
     Promise.all([this.backend.graphRuns(), this.backend.runMetadataRuns()])
     .then(function(result) {
-      var runsWithGraph = result[0];
+      var runsWithGraph = result[0].sort(VZ.Sorting.compareTagNames);
       var runToMetadata = result[1];
       var datasets = _.map(runsWithGraph, function(runName) {
         return {
           name: runName,
           path: this.backend.router.graph(runName, tf.graph.LIMIT_ATTR_SIZE,
             tf.graph.LARGE_ATTRS_KEY),
-          runMetadata: runToMetadata[runName] ? _.map(runToMetadata[runName].sort(), function(tag) {
-            return {
-              tag: tag,
-              path: this.backend.router.runMetadata(tag, runName)
-            };
-          }, this) : []
+          runMetadata: runToMetadata[runName] ? _.map(
+            runToMetadata[runName].sort(VZ.Sorting.compareTagNames), function(tag) {
+              return {
+                tag: tag,
+                path: this.backend.router.runMetadata(tag, runName)
+              };
+            }, this) : []
         };
       }, this);
       this.set('_datasets', datasets);
diff --git a/tensorflow/tensorboard/components/tf-storage/storage.ts b/tensorflow/tensorboard/components/tf-storage/storage.ts
index 39827aabf9e..aefa55f3130 100644
--- a/tensorflow/tensorboard/components/tf-storage/storage.ts
+++ b/tensorflow/tensorboard/components/tf-storage/storage.ts
@@ -178,8 +178,7 @@ module TF.URIStorage {
    */
   export function getObjectInitializer(
       propertyName: string, defaultVal: Object): Function {
-    let clone = _.cloneDeep(defaultVal);
-    return _getInitializer(getObject, propertyName, clone);
+    return _getInitializer(getObject, propertyName, defaultVal);
   }
 
   /**
@@ -297,8 +296,11 @@ module TF.URIStorage {
     return function() {
       let URIStorageName = getURIStorageName(this, propertyName);
       let setComponentValue = () => {
+        // Clone, in case the caller will mutuate this object, we
+        // don't want to mutate our default instance
+        let v = _.clone(defaultVal);
         let uriValue = get(URIStorageName);
-        this[propertyName] = uriValue !== undefined ? uriValue : defaultVal;
+        this[propertyName] = uriValue !== undefined ? uriValue : v;
       };
       // Set the value on the property.
       setComponentValue();
diff --git a/tensorflow/tensorboard/components/tf-tensorboard/tf-tensorboard.html b/tensorflow/tensorboard/components/tf-tensorboard/tf-tensorboard.html
index 7b9cc4bf256..f32e6866929 100644
--- a/tensorflow/tensorboard/components/tf-tensorboard/tf-tensorboard.html
+++ b/tensorflow/tensorboard/components/tf-tensorboard/tf-tensorboard.html
@@ -34,7 +34,7 @@ limitations under the License.
 <link rel="import" href="../tf-dashboard-common/tensorboard-color.html">
 <link rel="import" href="../tf-backend/tf-backend.html">
 <link rel="import" href="../tf-storage/tf-storage.html">
-<link rel="import" href="../vz-projector/vz-projector.html">
+<link rel="import" href="../vz-projector/vz-projector-dashboard.html">
 
 <!--
 tf-tensorboard is the frontend entry point for TensorBoard.
@@ -126,10 +126,10 @@ allows the user to toggle between various dashboards.
         </template>
 
         <template is="dom-if" if="[[_modeIsProjections(mode)]]">
-          <vz-projector
+          <vz-projector-dashboard
             id="projector"
-            route-prefix="/data/plugin/projector"
-          ></vz-projector>
+            route-prefix="/data/plugin/projector">
+          </vz-projector-dashboard>
         </template>
       </div>
     </paper-header-panel>
diff --git a/tensorflow/tensorboard/components/vz-distribution-chart/demo/index.html b/tensorflow/tensorboard/components/vz-distribution-chart/demo/index.html
new file mode 100644
index 00000000000..b2810412ab1
--- /dev/null
+++ b/tensorflow/tensorboard/components/vz-distribution-chart/demo/index.html
@@ -0,0 +1,56 @@
+<!doctype html>
+<!--
+@license
+Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+<html>
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>vz-distribution chart demo</title>
+    <script src="../../webcomponentsjs/webcomponents-lite.min.js"></script>
+    <link rel="import" href="../vz-distribution-chart.html">
+    <link rel="import" href="../../iron-demo-helpers/demo-snippet.html">
+    <link rel="import" href="../../paper-styles/typography.html">
+    <style type="text/css">
+      body {
+        font-family: "Roboto";
+      }
+
+      vz-line-chart {
+        height: 400px;
+      }
+    </style>
+  </head>
+  <body>
+    <h3>Simple distribution chart</h3>
+    <demo-snippet>
+      <template>
+        <vz-distribution-chart id="demo"></vz-distribution-chart>
+        <script>
+          var elem = document.querySelector('#demo');
+          elem.setVisibleSeries(['demo']);
+          elem.setSeriesData('demo', data);
+        </script>
+      </template>
+    </demo-snippet>
+
+
+    <script>
+      var data = [[1716.20,0,[[0,0.10],[668,0.10],[1587,0.10],[3085,0.10],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.10]]],[1720.60,10,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.10]]],[1724.90,20,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.10]]],[1729.18,30,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.10]]],[1733.55,40,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.11]]],[1737.84,50,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.10]]],[1742.35,60,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.11]]],[1746.75,70,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.11]]],[1751.15,80,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.10]]],[1755.44,90,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.10]]],[1759.99,100,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.11]]],[1764.40,110,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.11]]],[1768.70,120,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.11]]],[1773.00,130,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.11]]],[1777.57,140,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.10],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.11]]],[1781.96,150,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.10],[9332,0.10],[10000,0.11]]],[1786.34,160,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.10],[9332,0.11],[10000,0.11]]],[1790.67,170,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.10],[9332,0.11],[10000,0.11]]],[1794.96,180,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.10],[9332,0.11],[10000,0.11]]],[1799.29,190,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.10],[9332,0.11],[10000,0.11]]],[1803.68,200,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.10],[9332,0.11],[10000,0.11]]],[1808.88,210,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.11],[9332,0.11],[10000,0.11]]],[1813.33,220,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.10],[9332,0.11],[10000,0.11]]],[1817.66,230,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.10],[9332,0.11],[10000,0.11]]],[1821.95,240,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.10],[9332,0.11],[10000,0.11]]],[1826.97,250,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.11],[9332,0.11],[10000,0.11]]],[1831.64,260,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.10],[9332,0.11],[10000,0.11]]],[1836.01,270,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.11],[9332,0.11],[10000,0.11]]],[1840.31,280,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.11],[9332,0.11],[10000,0.11]]],[1844.63,290,[[0,0.09],[668,0.09],[1587,0.09],[3085,0.09],[5000,0.09],[6915,0.10],[8413,0.11],[9332,0.11],[10000,0.11]]]];
+    </script>
+  </body>
+</html>
diff --git a/tensorflow/tensorboard/components/vz-distribution-chart/index.html b/tensorflow/tensorboard/components/vz-distribution-chart/index.html
new file mode 100644
index 00000000000..b7b399d3fc8
--- /dev/null
+++ b/tensorflow/tensorboard/components/vz-distribution-chart/index.html
@@ -0,0 +1,30 @@
+<!doctype html>
+<!--
+@license
+Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+<html>
+  <head>
+    <title>vz-line-chart</title>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <script src="../webcomponentsjs/webcomponents-lite.js"></script>
+    <link rel="import" href="../iron-component-page/iron-component-page.html">
+  </head>
+  <body>
+    <iron-component-page src="vz-line-chart.html"></iron-component-page>
+  </body>
+</html>
diff --git a/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.html b/tensorflow/tensorboard/components/vz-distribution-chart/vz-distribution-chart.html
similarity index 73%
rename from tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.html
rename to tensorflow/tensorboard/components/vz-distribution-chart/vz-distribution-chart.html
index 0cb54940686..726e2216072 100644
--- a/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.html
+++ b/tensorflow/tensorboard/components/vz-distribution-chart/vz-distribution-chart.html
@@ -19,7 +19,7 @@ limitations under the License.
 <link rel="import" href="../tf-imports/plottable.html">
 <link rel="import" href="../tf-imports/lodash.html">
 
-<dom-module id="tf-distribution-chart">
+<dom-module id="vz-distribution-chart">
   <template>
     <svg id="chartsvg"></svg>
     <style>
@@ -41,14 +41,38 @@ limitations under the License.
 
     </style>
   </template>
-  <script src="tf-distribution-chart.js"></script>
+  <script src="vz-distribution-chart.js"></script>
   <script src="../vz-line-chart/vz-chart-helpers.js"></script>
   <script>
     Polymer({
-      is: "tf-distribution-chart",
+      is: "vz-distribution-chart",
       properties: {
-        colorScale: Object,
-        xType: String,
+        /**
+         * Scale that maps series names to colors. The default colors are from
+         * d3.scale.category10() scale. Use this property to replace the default
+         * line colors with colors of your own choice.
+         * @type {Plottable.Scales.Color}
+         * @required
+         */
+        colorScale: {
+          type: Object,
+          value: function() {
+            return new Plottable.Scales.Color()
+                .range(d3.scale.category10().range());
+          }
+        },
+        /**
+         * The way to display the X values. Allows:
+         * - "step" - Linear scale using the  "step" property of the datum.
+         * - "wall_time" - Temporal scale using the "wall_time" property of the
+         * datum.
+         * - "relative" - Temporal scale using the "relative" property of the
+         * datum if it is present or calculating from "wall_time" if it isn't.
+         */
+        xType: {
+          type: String,
+          value: 'step'
+        },
         _attached: Boolean,
         _chart: Object,
         _visibleSeriesCache: {
@@ -93,7 +117,7 @@ limitations under the License.
           this._makeChartAsyncCallbackId = null;
           if (!_attached) return;
           if (this._chart) this._chart.destroy();
-          var chart = new TF.DistributionChart(xType, colorScale);
+          var chart = new VZ.DistributionChart(xType, colorScale);
           var svg = d3.select(this.$.chartsvg);
           chart.renderTo(svg);
           this._chart = chart;
diff --git a/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.ts b/tensorflow/tensorboard/components/vz-distribution-chart/vz-distribution-chart.ts
similarity index 82%
rename from tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.ts
rename to tensorflow/tensorboard/components/vz-distribution-chart/vz-distribution-chart.ts
index c3e739abf67..1c64eb4cd49 100644
--- a/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-chart.ts
+++ b/tensorflow/tensorboard/components/vz-distribution-chart/vz-distribution-chart.ts
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 /* tslint:disable:no-namespace variable-name */
 
-module TF {
+module VZ {
   export class DistributionChart {
     private run2datasets: {[run: string]: Plottable.Dataset};
     protected runs: string[];
@@ -32,6 +32,8 @@ module TF {
     protected colorScale: Plottable.Scales.Color;
     private plots: Plottable.XYPlot<number|Date, number>[];
 
+    private targetSVG: d3.Selection<any>;
+
     constructor(xType: string, colorScale: Plottable.Scales.Color) {
       this.run2datasets = {};
       this.colorScale = colorScale;
@@ -123,9 +125,36 @@ module TF {
       this.getDataset(name).data(data);
     }
 
-    public renderTo(target: d3.Selection<any>) { this.outer.renderTo(target); }
+    public renderTo(targetSVG: d3.Selection<any>) {
+      this.targetSVG = targetSVG;
+      this.setViewBox();
+      this.outer.renderTo(targetSVG);
+    }
 
-    public redraw() { this.outer.redraw(); }
+    /** There's an issue in Chrome where the svg overflow is a bit
+     * "flickery". There is a border on the gridlines on the extreme edge of the
+     * chart, which behaves inconsistently and causes the screendiffing tests to
+     * flake. We can solve this by creating 1px effective margin for the svg by
+     * setting the viewBox on the containing svg.
+     */
+    private setViewBox() {
+      // There's an issue in Firefox where if we measure with the old viewbox
+      // set, we get horrible results.
+      this.targetSVG.attr('viewBox', null);
+
+      let parent = this.targetSVG.node().parentNode as HTMLElement;
+      let w = parent.clientWidth;
+      let h = parent.clientHeight;
+      this.targetSVG.attr({
+        'height': h,
+        'viewBox': `0 0 ${w + 1} ${h + 1}`,
+      });
+    }
+
+    public redraw() {
+      this.outer.redraw();
+      this.setViewBox();
+    }
 
     protected destroy() { this.outer.destroy(); }
   }
diff --git a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
index 720b162393e..fd66f8bf212 100644
--- a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
+++ b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
@@ -527,9 +527,9 @@ module VZ {
     }
 
     public renderTo(targetSVG: d3.Selection<any>) {
-      this.outer.renderTo(targetSVG);
       this.targetSVG = targetSVG;
       this.setViewBox();
+      this.outer.renderTo(targetSVG);
     }
 
     /** There's an issue in Chrome where the svg overflow is a bit
@@ -541,13 +541,15 @@ module VZ {
     private setViewBox() {
       // There's an issue in Firefox where if we measure with the old viewbox
       // set, we get horrible results.
-      this.targetSVG.attr('viewBox', '');
+      this.targetSVG.attr('viewBox', null);
 
-      let svg = this.targetSVG.node() as HTMLElement;
-      let brect = svg.getBoundingClientRect();
-      let w = brect.width;
-      let h = brect.height;
-      this.targetSVG.attr('viewBox', `0 0 ${w + 1} ${h + 1}`);
+      let parent = this.targetSVG.node().parentNode as HTMLElement;
+      let w = parent.clientWidth;
+      let h = parent.clientHeight;
+      this.targetSVG.attr({
+        'height': h,
+        'viewBox': `0 0 ${w + 1} ${h + 1}`,
+      });
     }
 
     public redraw() {
diff --git a/tensorflow/tensorboard/components/vz-projector/async.ts b/tensorflow/tensorboard/components/vz-projector/async.ts
index 7e092ac6f67..88791e29276 100644
--- a/tensorflow/tensorboard/components/vz-projector/async.ts
+++ b/tensorflow/tensorboard/components/vz-projector/async.ts
@@ -20,10 +20,10 @@ const ASYNC_DELAY_MS = 25;
 const WARNING_DURATION_MS = 5000;
 
 /**
- * Animation duration for the user message which should align with `transition`
- * css property in `.notify-msg` in `vz-projector.html`.
+ * Animation duration for the user message which should be +20ms more than the
+ * `transition` css property in `.notify-msg` in `vz-projector.html`.
  */
-const MSG_ANIMATION_DURATION = 300;
+const MSG_ANIMATION_DURATION_MSEC = 300 + 20;
 
 
 /**
@@ -60,6 +60,7 @@ export function runAsyncTask<T>(message: string, task: () => T,
 }
 
 let msgId = 0;
+let numActiveMessages = 0;
 
 /**
  * Updates the user message with the provided id.
@@ -70,6 +71,7 @@ let msgId = 0;
  * @return The id of the message.
  */
 export function updateMessage(msg: string, id: string = null): string {
+  let dialog = d3.select('#wrapper-notify-msg').node() as any;
   if (id == null) {
     id = (msgId++).toString();
   }
@@ -80,12 +82,19 @@ export function updateMessage(msg: string, id: string = null): string {
     msgDiv = d3.select('#notify-msgs').insert('div', ':first-child')
       .attr('class', 'notify-msg')
       .attr('id', divId);
+    numActiveMessages++;
   }
   if (msg == null) {
+    numActiveMessages--;
+    if (numActiveMessages === 0) {
+      dialog.close();
+    }
     msgDiv.style('opacity', 0);
-    setTimeout(() => msgDiv.remove(), MSG_ANIMATION_DURATION);
+    msgDiv.style('height', 0);
+    setTimeout(() => msgDiv.remove(), MSG_ANIMATION_DURATION_MSEC);
   } else {
     msgDiv.text(msg);
+    dialog.open();
   }
   return id;
 }
diff --git a/tensorflow/tensorboard/components/vz-projector/data-loader.ts b/tensorflow/tensorboard/components/vz-projector/data-loader.ts
index a67a50e6353..7ff73fe166a 100644
--- a/tensorflow/tensorboard/components/vz-projector/data-loader.ts
+++ b/tensorflow/tensorboard/components/vz-projector/data-loader.ts
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 import {runAsyncTask, updateMessage} from './async';
-import {ColumnStats, DataPoint, DataSet, DatasetMetadata, MetadataInfo, PointMetadata, State} from './data';
+import {ColumnStats, DataPoint, DataSet, DatasetMetadata, MetadataInfo, PointMetadata, State, DataProto} from './data';
 
 
 /** Maximum number of colors supported in the color map. */
@@ -41,6 +41,8 @@ export interface CheckpointInfo {
   checkpointFile: string;
 }
 
+export type ServingMode = 'demo' | 'server' | 'proto';
+
 /** Interface between the data storage and the UI. */
 export interface DataProvider {
   /** Returns a list of run names that have embedding config files. */
@@ -78,8 +80,6 @@ export interface DataProvider {
  * by a checkpoint file).
  */
 class ServerDataProvider implements DataProvider {
-  /** Prefix added to the http requests when asking the server for data. */
-  static DEFAULT_ROUTE_PREFIX = 'data';
   private routePrefix: string;
   private runCheckpointInfoCache: {[run: string]: CheckpointInfo} = {};
 
@@ -143,8 +143,15 @@ class ServerDataProvider implements DataProvider {
   getDefaultTensor(run: string, callback: (tensorName: string) => void) {
     this.retrieveCheckpointInfo(run, checkpointInfo => {
       let tensorNames = Object.keys(checkpointInfo.tensors);
-      // Return the first tensor as default if there is only 1 tensor.
-      callback(tensorNames.length === 1 ? tensorNames[0] : null);
+      // Return the first tensor that has metadata.
+      for (let i = 0; i < tensorNames.length; i++) {
+        let tensorName = tensorNames[i];
+        if (checkpointInfo.tensors[tensorName].metadataFile) {
+          callback(tensorName);
+          return;
+        }
+      }
+      callback(tensorNames.length >= 1 ? tensorNames[0] : null);
     });
   }
 
@@ -162,25 +169,113 @@ class ServerDataProvider implements DataProvider {
   }
 }
 
+class ProtoDataProvider implements DataProvider {
+  private dataProto: DataProto;
+
+  constructor(dataProto: DataProto) {
+    this.dataProto = dataProto;
+  }
+
+  retrieveRuns(callback: (runs: string[]) => void): void {
+    callback(['proto']);
+  }
+
+  retrieveCheckpointInfo(run: string, callback: (d: CheckpointInfo) => void) {
+    callback({
+      tensors: {
+        'proto': {
+          name: 'proto',
+          shape: this.dataProto.shape,
+          metadataFile: 'proto',
+          bookmarksFile: null
+        }
+      },
+      checkpointFile: 'proto'
+    });
+  }
+
+  retrieveTensor(run: string, tensorName: string,
+      callback: (ds: DataSet) => void) {
+    callback(this.flatArrayToDataset(this.dataProto.tensor));
+  }
+
+  retrieveMetadata(run: string, tensorName: string,
+      callback: (r: MetadataInfo) => void): void {
+    let columnNames = this.dataProto.metadata.columns.map(c => c.name);
+    let n = this.dataProto.shape[0];
+    let pointsMetadata: PointMetadata[] = new Array(n);
+    this.dataProto.metadata.columns.forEach(c => {
+      let values = c.numericValues || c.stringValues;
+      for (let i = 0; i < n; i++) {
+        pointsMetadata[i] = pointsMetadata[i] || {};
+        pointsMetadata[i][c.name] = values[i];
+      }
+    });
+    callback({
+      stats: analyzeMetadata(columnNames, pointsMetadata),
+      pointsInfo: pointsMetadata
+    });
+  }
+
+  getDefaultTensor(run: string, callback: (tensorName: string) => void): void {
+    callback('proto');
+  }
+
+  getBookmarks(run: string, tensorName: string,
+      callback: (r: State[]) => void): void {
+    return callback([]);
+  }
+
+  private flatArrayToDataset(tensor: number[]): DataSet {
+    let points: DataPoint[] = [];
+    let n = this.dataProto.shape[0];
+    let d = this.dataProto.shape[1];
+    if (n * d !== tensor.length) {
+      throw 'The shape doesn\'t match the length of the flattened array';
+    }
+    for (let i = 0; i < n; i++) {
+      let vector: number[] = [];
+      let offset = i * d;
+      for (let j = 0; j < d; j++) {
+        vector.push(tensor[offset++]);
+      }
+      points.push({
+        vector: vector,
+        metadata: {},
+        projections: null,
+        projectedPoint: null,
+        index: i
+      });
+    }
+    return new DataSet(points);
+  }
+}
+
 /**
  * Returns a data provider, depending on what is available. The detection of
  * a server backend is done by issuing an HTTP request at /data/info and seeing
  * if it returns 200 or 404.
  *
+ * @param servingMode Information how the data served (server, proto, etc.).
+ * @param dataProto The projector data, in a proto format. Available if
+ *     serving mode is 'proto'.
  * @param routePrefix The prefix to add to the url routes when asking for data
  *     from the backend. For example, when hosted inside tensorboard, the route
  *     is prefixed by the plugin name.
  * @param callback Called with the data provider.
  */
-export function getDataProvider(
+export function getDataProvider(servingMode: ServingMode, dataProto: DataProto,
     routePrefix: string, callback: (dp: DataProvider) => void) {
-  if (routePrefix == null) {
-    routePrefix = ServerDataProvider.DEFAULT_ROUTE_PREFIX;
+  if (servingMode === 'demo') {
+    callback(new DemoDataProvider());
+  } else if (servingMode === 'server') {
+    if (!routePrefix) {
+      throw 'route-prefix is a required parameter';
+    }
+    callback(new ServerDataProvider(routePrefix));
+  } else if (servingMode === 'proto' && dataProto != null) {
+    callback(new ProtoDataProvider(dataProto));
   }
-  d3.json(`${routePrefix}/runs`, (err, runs) => {
-    callback(
-        err ? new DemoDataProvider() : new ServerDataProvider(routePrefix));
-  });
 }
 
 export function parseRawTensors(
@@ -242,11 +337,64 @@ function parseTensors(content: string, delim = '\t'): Promise<DataPoint[]> {
   });
 }
 
+function analyzeMetadata(columnNames, pointsMetadata: PointMetadata[]):
+    ColumnStats[] {
+  let columnStats: ColumnStats[] = columnNames.map(name => {
+    return {
+      name: name,
+      isNumeric: true,
+      tooManyUniqueValues: false,
+      min: Number.POSITIVE_INFINITY,
+      max: Number.NEGATIVE_INFINITY
+    };
+  });
+  let mapOfValues = columnNames.map(() => d3.map<number>());
+  pointsMetadata.forEach(metadata => {
+    columnNames.forEach((name: string, colIndex: number) => {
+      let stats = columnStats[colIndex];
+      let map = mapOfValues[colIndex];
+      let value = metadata[name];
+
+      // Skip missing values.
+      if (value == null) {
+        return;
+      }
+
+      if (!stats.tooManyUniqueValues) {
+        if (map.has(value)) {
+          map.set(value, map.get(value) + 1);
+        } else {
+          map.set(value, 1);
+        }
+        if (map.size() > NUM_COLORS_COLOR_MAP) {
+          stats.tooManyUniqueValues = true;
+        }
+      }
+      if (isNaN(value as any)) {
+        stats.isNumeric = false;
+      } else {
+        metadata[name] = +value;
+        stats.min = Math.min(stats.min, +value);
+        stats.max = Math.max(stats.max, +value);
+      }
+    });
+  });
+  columnStats.forEach((stats, colIndex) => {
+    let map = mapOfValues[colIndex];
+    if (!stats.tooManyUniqueValues) {
+      stats.uniqueEntries = map.entries().map(e => {
+        return {label: e.key, count: e.value};
+      });
+    }
+  });
+  return columnStats;
+}
+
 function parseMetadata(content: string): Promise<MetadataInfo> {
   return runAsyncTask('Parsing metadata...', () => {
     let lines = content.split('\n').filter(line => line.trim().length > 0);
     let hasHeader = lines[0].indexOf('\t') >= 0;
-    let allMetadata: PointMetadata[] = [];
+    let pointsMetadata: PointMetadata[] = [];
     // If the first row doesn't contain metadata keys, we assume that the values
     // are labels.
     let columnNames = ['label'];
@@ -254,65 +402,20 @@ function parseMetadata(content: string): Promise<MetadataInfo> {
       columnNames = lines[0].split('\t');
       lines = lines.slice(1);
     }
-
-    let columnStats: ColumnStats[] = columnNames.map(name => {
-      return {
-        name: name,
-        isNumeric: true,
-        tooManyUniqueValues: false,
-        min: Number.POSITIVE_INFINITY,
-        max: Number.NEGATIVE_INFINITY
-      };
-    });
-    let mapOfValues = columnNames.map(() => d3.map<number>());
     lines.forEach((line: string) => {
       let rowValues = line.split('\t');
       let metadata: PointMetadata = {};
-      allMetadata.push(metadata);
+      pointsMetadata.push(metadata);
       columnNames.forEach((name: string, colIndex: number) => {
         let value = rowValues[colIndex];
-        let map = mapOfValues[colIndex];
-        let stats = columnStats[colIndex];
         // Normalize missing values.
         value = (value === '' ? null : value);
         metadata[name] = value;
-
-        // Skip missing values.
-        if (value == null) {
-          return;
-        }
-
-        // Update stats.
-        if (!stats.tooManyUniqueValues) {
-          if (map.has(value)) {
-            map.set(value, map.get(value) + 1);
-          } else {
-            map.set(value, 1);
-          }
-          if (map.size() > NUM_COLORS_COLOR_MAP) {
-            stats.tooManyUniqueValues = true;
-          }
-        }
-        if (isNaN(value as any)) {
-          stats.isNumeric = false;
-        } else {
-          metadata[name] = +value;
-          stats.min = Math.min(stats.min, +value);
-          stats.max = Math.max(stats.max, +value);
-        }
       });
     });
-    columnStats.forEach((stats, colIndex) => {
-      let map = mapOfValues[colIndex];
-      if (!stats.tooManyUniqueValues) {
-        stats.uniqueEntries = map.entries().map(e => {
-          return {label: e.key, count: e.value};
-        });
-      }
-    });
     return {
-      stats: columnStats,
-      pointsInfo: allMetadata
+      stats: analyzeMetadata(columnNames, pointsMetadata),
+      pointsInfo: pointsMetadata
     } as MetadataInfo;
   }, METADATA_MSG_ID).then(metadata => {
     updateMessage(null, METADATA_MSG_ID);
diff --git a/tensorflow/tensorboard/components/vz-projector/data.ts b/tensorflow/tensorboard/components/vz-projector/data.ts
index ed3ad53f761..ef51ad0c8aa 100644
--- a/tensorflow/tensorboard/components/vz-projector/data.ts
+++ b/tensorflow/tensorboard/components/vz-projector/data.ts
@@ -26,6 +26,18 @@ export interface PointMetadata {
   [key: string]: number | string;
 }
 
+export interface DataProto {
+  shape: [number, number];
+  tensor: number[];
+  metadata: {
+    columns: Array<{
+      name: string;
+      stringValues: string[];
+      numericValues: number[];
+    }>;
+  };
+}
+
 /** Statistics for a metadata column. */
 export interface ColumnStats {
   name: string;
diff --git a/tensorflow/tensorboard/components/vz-projector/renderContext.ts b/tensorflow/tensorboard/components/vz-projector/renderContext.ts
index f2b70a072f0..16ccf8f9377 100644
--- a/tensorflow/tensorboard/components/vz-projector/renderContext.ts
+++ b/tensorflow/tensorboard/components/vz-projector/renderContext.ts
@@ -13,49 +13,63 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+/**
+ * LabelRenderParams describes the set of points that should have labels
+ * rendered next to them.
+ */
+export class LabelRenderParams {
+  pointIndices: Float32Array;
+  scaleFactors: Float32Array;
+  useSceneOpacityFlags: Int8Array;  // booleans
+  defaultFontSize: number;
+  fillColor: number;
+  strokeColor: number;
+
+  constructor(
+      pointIndices: Float32Array, scaleFactors: Float32Array,
+      useSceneOpacityFlags: Int8Array, defaultFontSize: number,
+      fillColor: number, strokeColor: number) {
+    this.pointIndices = pointIndices;
+    this.scaleFactors = scaleFactors;
+    this.useSceneOpacityFlags = useSceneOpacityFlags;
+    this.defaultFontSize = defaultFontSize;
+    this.fillColor = fillColor;
+    this.strokeColor = strokeColor;
+  }
+}
+
 /**
  * RenderContext contains all of the state required to color and render the data
  * set. ScatterPlot passes this to every attached visualizer as part of the
  * render callback.
  */
 export class RenderContext {
-  camera: THREE.PerspectiveCamera;
+  camera: THREE.Camera;
   cameraTarget: THREE.Vector3;
   screenWidth: number;
   screenHeight: number;
   nearestCameraSpacePointZ: number;
   farthestCameraSpacePointZ: number;
-  labelAccessor: (index: number) => string;
   pointColors: Float32Array;
   pointScaleFactors: Float32Array;
-  labelIndices: Uint32Array;
-  labelScaleFactors: Float32Array;
-  labelDefaultFontSize: number;
-  labelFillColor: number;
-  labelStrokeColor: number;
+  labelAccessor: (index: number) => string;
+  labels: LabelRenderParams;
 
   constructor(
-      camera: THREE.PerspectiveCamera, cameraTarget: THREE.Vector3,
-      screenWidth: number, screenHeight: number,
-      nearestCameraSpacePointZ: number, farthestCameraSpacePointZ: number,
+      camera: THREE.Camera, cameraTarget: THREE.Vector3, screenWidth: number,
+      screenHeight: number, nearestCameraSpacePointZ: number,
+      farthestCameraSpacePointZ: number,
       labelAccessor: (index: number) => string, pointColors: Float32Array,
-      pointScaleFactors: Float32Array, visibleLabelIndices: Uint32Array,
-      visibleLabelScaleFactors: Float32Array,
-      visibleLabelDefaultFontSize: number, visibleLabelFillColor: number,
-      visibleLabelStrokeColor: number) {
+      pointScaleFactors: Float32Array, labels: LabelRenderParams) {
     this.camera = camera;
     this.cameraTarget = cameraTarget;
     this.screenWidth = screenWidth;
     this.screenHeight = screenHeight;
     this.nearestCameraSpacePointZ = nearestCameraSpacePointZ;
     this.farthestCameraSpacePointZ = farthestCameraSpacePointZ;
-    this.labelAccessor = labelAccessor;
     this.pointColors = pointColors;
     this.pointScaleFactors = pointScaleFactors;
-    this.labelIndices = visibleLabelIndices;
-    this.labelScaleFactors = visibleLabelScaleFactors;
-    this.labelDefaultFontSize = visibleLabelDefaultFontSize;
-    this.labelFillColor = visibleLabelFillColor;
-    this.labelStrokeColor = visibleLabelStrokeColor;
+    this.labelAccessor = labelAccessor;
+    this.labels = labels;
   }
 }
diff --git a/tensorflow/tensorboard/components/vz-projector/scatterPlot.ts b/tensorflow/tensorboard/components/vz-projector/scatterPlot.ts
index f59dc3ec861..fbc68e15265 100644
--- a/tensorflow/tensorboard/components/vz-projector/scatterPlot.ts
+++ b/tensorflow/tensorboard/components/vz-projector/scatterPlot.ts
@@ -14,12 +14,12 @@ limitations under the License.
 ==============================================================================*/
 
 import {HoverContext} from './hoverContext';
-import {RenderContext} from './renderContext';
+import {LabelRenderParams, RenderContext} from './renderContext';
 import {ScatterPlotVisualizer} from './scatterPlotVisualizer';
 import {ScatterPlotVisualizerAxes} from './scatterPlotVisualizerAxes';
 import {SelectionContext} from './selectionContext';
 import {getNearFarPoints, getProjectedPointFromIndex, vector3DToScreenCoords} from './util';
-import {dist_2D, Point3D} from './vector';
+import {dist_2D, Point2D, Point3D} from './vector';
 
 const BACKGROUND_COLOR = 0xffffff;
 
@@ -32,41 +32,22 @@ const MAX_ZOOM = 5 * CUBE_LENGTH;
 const MIN_ZOOM = 0.025 * CUBE_LENGTH;
 
 // Constants relating to the camera parameters.
-const FOV_VERTICAL = 70;
-const NEAR = 0.01;
-const FAR = 100;
+const PERSP_CAMERA_FOV_VERTICAL = 70;
+const PERSP_CAMERA_NEAR_CLIP_PLANE = 0.01;
+const PERSP_CAMERA_FAR_CLIP_PLANE = 100;
+const ORTHO_CAMERA_FRUSTUM_HALF_EXTENT = 1.2;
 
 // Key presses.
 const SHIFT_KEY = 16;
 const CTRL_KEY = 17;
 
-// Original positions of camera and camera target, in 2d and 3d
-const POS_3D = {
-  x: 1.5,
-  y: 1.5,
-  z: 1.5
-};
+const START_CAMERA_POS_3D = new THREE.Vector3(0.6, 1.0, 1.85);
+const START_CAMERA_TARGET_3D = new THREE.Vector3(0, 0, 0);
+const START_CAMERA_POS_2D = new THREE.Vector3(0, 0, 1);
+const START_CAMERA_TARGET_2D = new THREE.Vector3(0, 0, 0);
 
-// Target for the camera in 3D is the center of the 1, 1, 1 square, as all our
-// data is scaled to this.
-const TAR_3D = {
-  x: 0,
-  y: 0,
-  z: 0
-};
-
-const POS_2D = {
-  x: 0,
-  y: 0,
-  z: 2
-};
-
-// In 3D, the target is the center of the xy plane.
-const TAR_2D = {
-  x: 0,
-  y: 0,
-  z: 0
-};
+const ORBIT_MOUSE_ROTATION_SPEED = 1;
+const ORBIT_ANIMATION_ROTATION_CYCLE_IN_SECONDS = 7;
 
 /** The spacial data of points and lines that will be shown in the projector. */
 export interface DataSet {
@@ -118,7 +99,6 @@ export class ScatterPlot {
 
   private labelAccessor: (index: number) => string;
   private onCameraMoveListeners: OnCameraMoveListener[] = [];
-  private lazySusanAnimation: number;
 
   // Accessors for rendering and labeling the points.
   private xAccessor: (index: number) => number;
@@ -137,29 +117,26 @@ export class ScatterPlot {
   private mode: Mode;
   private backgroundColor: number = BACKGROUND_COLOR;
 
-  private scene: THREE.Scene;
+  private dimensionality: number = 3;
   private renderer: THREE.WebGLRenderer;
-  private perspCamera: THREE.PerspectiveCamera;
-  private cameraControls: any;
+
+  private scene: THREE.Scene;
   private pickingTexture: THREE.WebGLRenderTarget;
   private light: THREE.PointLight;
   private selectionSphere: THREE.Mesh;
 
+  private camera: THREE.Camera;
+  private orbitCameraControls: any;
+  private orbitAnimationId: number;
+
   private pointColors: Float32Array;
   private pointScaleFactors: Float32Array;
-  private labelIndices: Uint32Array;
-  private labelScaleFactors: Float32Array;
-  private labelStrokeColor: number;
-  private labelFillColor: number;
-  private labelDefaultFontSize: number;
+  private labels: LabelRenderParams;
 
-  private animating = false;
   private selecting = false;
   private nearestPoint: number;
   private mouseIsDown = false;
   private isDragSequence = false;
-  private animationID: number;
-  private cameraSetFromState: boolean = false;
 
   constructor(
       container: d3.Selection<any>, labelAccessor: (index: number) => string,
@@ -174,20 +151,18 @@ export class ScatterPlot {
     this.yScale = d3.scale.linear();
     this.zScale = d3.scale.linear();
 
-    // Set up THREE.js.
     this.scene = new THREE.Scene();
     this.renderer = new THREE.WebGLRenderer();
     this.renderer.setClearColor(BACKGROUND_COLOR, 1);
     this.containerNode.appendChild(this.renderer.domElement);
     this.light = new THREE.PointLight(0xFFECBF, 1, 0);
     this.scene.add(this.light);
-    this.makeCamera();
 
-    // Render now so no black background appears during startup.
-    this.renderer.render(this.scene, this.perspCamera);
-    this.addInteractionListeners();
+    this.setDimensions(3);
+    this.renderer.render(this.scene, this.camera);
 
     this.addAxesToScene();
+    this.addInteractionListeners();
   }
 
   private addInteractionListeners() {
@@ -201,109 +176,102 @@ export class ScatterPlot {
     window.addEventListener('keyup', this.onKeyUp.bind(this), false);
   }
 
-  /** Set up camera and camera's controller. */
-  private makeCamera() {
-    this.perspCamera = new THREE.PerspectiveCamera(
-        FOV_VERTICAL, this.width / this.height, NEAR, FAR);
-    this.cameraControls =
-        new (THREE as any)
-            .OrbitControls(this.perspCamera, this.renderer.domElement);
-    this.cameraControls.minDistance = MIN_ZOOM;
-    this.cameraControls.maxDistance = MAX_ZOOM;
+  private addCameraControlsEventListeners(cameraControls: any) {
     // Start is called when the user stars interacting with
-    // orbit controls.
-    this.cameraControls.addEventListener('start', () => {
-      this.cameraControls.autoRotate = false;
+    // controls.
+    cameraControls.addEventListener('start', () => {
+      this.stopOrbitAnimation();
       this.onCameraMoveListeners.forEach(
-          l => l(this.perspCamera.position, this.cameraControls.target));
-      cancelAnimationFrame(this.lazySusanAnimation);
+          l => l(this.camera.position, cameraControls.target));
     });
-    // Change is called everytime the user interacts with the
-    // orbit controls.
-    this.cameraControls.addEventListener('change', () => {
+
+    // Change is called everytime the user interacts with the controls.
+    cameraControls.addEventListener('change', () => {
       this.render();
     });
+
     // End is called when the user stops interacting with the
-    // orbit controls (e.g. on mouse up, after dragging).
-    this.cameraControls.addEventListener('end', () => {
-    });
+    // controls (e.g. on mouse up, after dragging).
+    cameraControls.addEventListener('end', () => {});
   }
 
-  /** Sets up camera to work in 3D (called after makeCamera()). */
-  private makeCamera3D(animate?: boolean) {
-    // Set up the camera position at a skewed angle from the xy plane, looking
-    // toward the origin
-    this.cameraControls.position0.set(POS_3D.x, POS_3D.y, POS_3D.z);
-    this.cameraControls.target0.set(TAR_3D.x, TAR_3D.y, TAR_3D.z);
-    this.cameraControls.enableRotate = true;
-    this.cameraControls.mouseButtons.ORBIT = THREE.MOUSE.LEFT;
-    this.cameraControls.mouseButtons.PAN = THREE.MOUSE.RIGHT;
-    let position = new THREE.Vector3(POS_3D.x, POS_3D.y, POS_3D.z);
-    let target = new THREE.Vector3(TAR_3D.x, TAR_3D.y, TAR_3D.z);
+  private makeCamera3D() {
+    let camera: THREE.PerspectiveCamera;
+    {
+      const aspectRatio = this.width / this.height;
+      camera = new THREE.PerspectiveCamera(
+          PERSP_CAMERA_FOV_VERTICAL, aspectRatio, PERSP_CAMERA_NEAR_CLIP_PLANE,
+          PERSP_CAMERA_FAR_CLIP_PLANE);
+      camera.position.copy(START_CAMERA_POS_3D);
+      camera.lookAt(START_CAMERA_TARGET_3D);
+    }
 
-    // Don't animate if the camera is set from a bookmark load.
-    // TODO(nsthorat): Remove this. This method shouldn't be called every time
-    // a projection changes.
-    if (!this.cameraSetFromState) {
-      if (animate) {
-        this.animate(position, target, () => {
-          this.startLazySusanAnimation();
-        });
+    const occ =
+        new (THREE as any).OrbitControls(camera, this.renderer.domElement);
+
+    occ.enableRotate = true;
+    occ.rotateSpeed = ORBIT_MOUSE_ROTATION_SPEED;
+    occ.mouseButtons.ORBIT = THREE.MOUSE.LEFT;
+    occ.mouseButtons.PAN = THREE.MOUSE.RIGHT;
+
+    if (this.orbitCameraControls != null) {
+      this.orbitCameraControls.dispose();
+    }
+
+    this.camera = camera;
+    this.orbitCameraControls = occ;
+    this.addCameraControlsEventListeners(this.orbitCameraControls);
+  }
+
+  private makeCamera2D(w: number, h: number) {
+    let camera: THREE.OrthographicCamera;
+    {
+      const aspectRatio = w / h;
+      let left = -ORTHO_CAMERA_FRUSTUM_HALF_EXTENT;
+      let right = ORTHO_CAMERA_FRUSTUM_HALF_EXTENT;
+      let bottom = -ORTHO_CAMERA_FRUSTUM_HALF_EXTENT;
+      let top = ORTHO_CAMERA_FRUSTUM_HALF_EXTENT;
+      // Scale up the larger of (w, h) to match the aspect ratio.
+      if (aspectRatio > 1) {
+        left *= aspectRatio;
+        right *= aspectRatio;
       } else {
-        this.cameraControls.target.set(target.x, target.y, target.z);
-        this.perspCamera.position.set(position.x, position.y, position.z);
-        this.cameraControls.update();
-        this.render();
+        top /= aspectRatio;
+        bottom /= aspectRatio;
       }
+      camera =
+          new THREE.OrthographicCamera(left, right, top, bottom, -1000, 1000);
+      camera.position.copy(START_CAMERA_POS_2D);
+      camera.up = new THREE.Vector3(0, 1, 0);
+      camera.lookAt(START_CAMERA_TARGET_2D);
     }
-    this.cameraSetFromState = false;
-  }
 
-  /** Sets up camera to work in 2D (called after makeCamera()). */
-  private makeCamera2D(animate?: boolean) {
-    // Set the camera position in the middle of the screen, looking directly
-    // toward the middle of the xy plane
-    this.cameraControls.position0.set(POS_2D.x, POS_2D.y, POS_2D.z);
-    this.cameraControls.target0.set(TAR_2D.x, TAR_2D.y, TAR_2D.z);
-    this.cameraControls.mouseButtons.PAN = THREE.MOUSE.LEFT;
-    this.cameraControls.mouseButtons.ORBIT = null;
-    let position = new THREE.Vector3(POS_2D.x, POS_2D.y, POS_2D.z);
-    let target = new THREE.Vector3(TAR_2D.x, TAR_2D.y, TAR_2D.z);
+    const occ =
+        new (THREE as any).OrbitControls(camera, this.renderer.domElement);
 
-    // Don't animate if the camera is set from a bookmark load.
-    // TODO(nsthorat): Remove this. This method shouldn't be called every time
-    // a projection changes.
-    if (!this.cameraSetFromState) {
-      this.animate(position, target);
+    occ.enableRotate = false;
+    occ.autoRotate = false;
+    occ.mouseButtons.ORBIT = null;
+    occ.mouseButtons.PAN = THREE.MOUSE.LEFT;
+
+    if (this.orbitCameraControls != null) {
+      this.orbitCameraControls.dispose();
     }
-    this.cameraSetFromState = false;
-    this.cameraControls.enableRotate = false;
+
+    this.camera = camera;
+    this.orbitCameraControls = occ;
+    this.addCameraControlsEventListeners(occ);
   }
 
-  /** Gets the current camera position. */
-  getCameraPosition(): Point3D {
-    let currPos = this.perspCamera.position;
-    return [currPos.x, currPos.y, currPos.z];
-  }
-
-  /** Gets the current camera target. */
-  getCameraTarget(): Point3D {
-    let currTarget = this.cameraControls.target;
-    return [currTarget.x, currTarget.y, currTarget.z];
-  }
-
-  /** Sets up the camera from given position and target coordinates. */
-  setCameraPositionAndTarget(position: Point3D, target: Point3D) {
-    this.perspCamera.position.set(position[0], position[1], position[2]);
-    this.cameraControls.target.set(target[0], target[1], target[2]);
-
-    this.cameraSetFromState = true;
-    this.cameraControls.autoRotate = false;
-    this.animating = false;
-    this.cancelAnimation();
-    cancelAnimationFrame(this.lazySusanAnimation);
-    this.cameraControls.update();
-    this.render();
+  private recreateCamera(dimensionality: number) {
+    if (dimensionality === 2) {
+      this.makeCamera2D(this.width, this.height);
+    } else if (dimensionality === 3) {
+      this.makeCamera3D();
+    }
+    this.orbitCameraControls.minDistance = MIN_ZOOM;
+    this.orbitCameraControls.maxDistance = MAX_ZOOM;
+    this.orbitCameraControls.update();
   }
 
   private onClick(e?: MouseEvent, notify = true) {
@@ -320,38 +288,37 @@ export class ScatterPlot {
   }
 
   private onMouseDown(e: MouseEvent) {
-    this.animating = false;
     this.isDragSequence = false;
     this.mouseIsDown = true;
     // If we are in selection mode, and we have in fact clicked a valid point,
     // create a sphere so we can select things
     if (this.selecting) {
-      this.cameraControls.enabled = false;
+      this.orbitCameraControls.enabled = false;
       this.setNearestPointToMouse(e);
       if (this.nearestPoint) {
         this.createSelectionSphere();
       }
     } else if (
-        !e.ctrlKey && this.zAccessor &&
-        this.cameraControls.mouseButtons.ORBIT === THREE.MOUSE.RIGHT) {
+        !e.ctrlKey && this.sceneIs3D() &&
+        this.orbitCameraControls.mouseButtons.ORBIT === THREE.MOUSE.RIGHT) {
       // The user happened to press the ctrl key when the tab was active,
       // unpressed the ctrl when the tab was inactive, and now he/she
       // is back to the projector tab.
-      this.cameraControls.mouseButtons.ORBIT = THREE.MOUSE.LEFT;
-      this.cameraControls.mouseButtons.PAN = THREE.MOUSE.RIGHT;
+      this.orbitCameraControls.mouseButtons.ORBIT = THREE.MOUSE.LEFT;
+      this.orbitCameraControls.mouseButtons.PAN = THREE.MOUSE.RIGHT;
     } else if (
-        e.ctrlKey && this.zAccessor &&
-        this.cameraControls.mouseButtons.ORBIT === THREE.MOUSE.LEFT) {
+        e.ctrlKey && this.sceneIs3D() &&
+        this.orbitCameraControls.mouseButtons.ORBIT === THREE.MOUSE.LEFT) {
       // Similarly to the situation above.
-      this.cameraControls.mouseButtons.ORBIT = THREE.MOUSE.RIGHT;
-      this.cameraControls.mouseButtons.PAN = THREE.MOUSE.LEFT;
+      this.orbitCameraControls.mouseButtons.ORBIT = THREE.MOUSE.RIGHT;
+      this.orbitCameraControls.mouseButtons.PAN = THREE.MOUSE.LEFT;
     }
   }
 
   /** When we stop dragging/zooming, return to normal behavior. */
   private onMouseUp(e: any) {
     if (this.selecting) {
-      this.cameraControls.enabled = true;
+      this.orbitCameraControls.enabled = true;
       this.scene.remove(this.selectionSphere);
       this.selectionSphere = null;
       this.render();
@@ -364,10 +331,7 @@ export class ScatterPlot {
    * hoverlisteners (usually called from embedding.ts)
    */
   private onMouseMove(e: MouseEvent) {
-    if (this.cameraControls.autoRotate) {
-      this.cameraControls.autoRotate = false;
-      cancelAnimationFrame(this.lazySusanAnimation);
-    }
+    this.stopOrbitAnimation();
     if (!this.dataSet) {
       return;
     }
@@ -387,9 +351,9 @@ export class ScatterPlot {
   /** For using ctrl + left click as right click, and for circle select */
   private onKeyDown(e: any) {
     // If ctrl is pressed, use left click to orbit
-    if (e.keyCode === CTRL_KEY && this.zAccessor) {
-      this.cameraControls.mouseButtons.ORBIT = THREE.MOUSE.RIGHT;
-      this.cameraControls.mouseButtons.PAN = THREE.MOUSE.LEFT;
+    if (e.keyCode === CTRL_KEY && this.sceneIs3D()) {
+      this.orbitCameraControls.mouseButtons.ORBIT = THREE.MOUSE.RIGHT;
+      this.orbitCameraControls.mouseButtons.PAN = THREE.MOUSE.LEFT;
     }
 
     // If shift is pressed, start selecting
@@ -401,9 +365,9 @@ export class ScatterPlot {
 
   /** For using ctrl + left click as right click, and for circle select */
   private onKeyUp(e: any) {
-    if (e.keyCode === CTRL_KEY && this.zAccessor) {
-      this.cameraControls.mouseButtons.ORBIT = THREE.MOUSE.LEFT;
-      this.cameraControls.mouseButtons.PAN = THREE.MOUSE.RIGHT;
+    if (e.keyCode === CTRL_KEY && this.sceneIs3D()) {
+      this.orbitCameraControls.mouseButtons.ORBIT = THREE.MOUSE.LEFT;
+      this.orbitCameraControls.mouseButtons.PAN = THREE.MOUSE.RIGHT;
     }
 
     // If shift is released, stop selecting
@@ -442,8 +406,8 @@ export class ScatterPlot {
   /** Returns the squared distance to the mouse for the i-th point. */
   private getDist2ToMouse(i: number, e: MouseEvent) {
     let point = getProjectedPointFromIndex(this.dataSet, i);
-    let screenCoords = vector3DToScreenCoords(
-        this.perspCamera, this.width, this.height, point);
+    let screenCoords =
+        vector3DToScreenCoords(this.camera, this.width, this.height, point);
     let dpr = window.devicePixelRatio || 1;
     return dist_2D(
         [e.offsetX * dpr, e.offsetY * dpr], [screenCoords[0], screenCoords[1]]);
@@ -456,10 +420,8 @@ export class ScatterPlot {
     this.dataSet.points.forEach(point => {
       const pt = point.projectedPoint;
       const pointVect = new THREE.Vector3(pt[0], pt[1], pt[2]);
-      const distPointToSphereOrigin = new THREE.Vector3()
-                                          .copy(this.selectionSphere.position)
-                                          .sub(pointVect)
-                                          .length();
+      const distPointToSphereOrigin =
+          this.selectionSphere.position.clone().sub(pointVect).length();
       if (distPointToSphereOrigin < dist) {
         selectedPoints.push(this.dataSet.points.indexOf(point));
       }
@@ -467,62 +429,6 @@ export class ScatterPlot {
     this.selectionContext.notifySelectionChanged(selectedPoints);
   }
 
-  /** Cancels current animation */
-  private cancelAnimation() {
-    if (this.animationID) {
-      cancelAnimationFrame(this.animationID);
-    }
-  }
-
-  private startLazySusanAnimation() {
-    this.cameraControls.autoRotate = true;
-    this.cameraControls.update();
-    this.lazySusanAnimation =
-        requestAnimationFrame(() => this.startLazySusanAnimation());
-  }
-
-  /**
-   * Animates the camera between one location and another.
-   * If callback is specified, it gets called when the animation is done.
-   */
-  private animate(
-      pos: THREE.Vector3, target: THREE.Vector3, callback?: () => void) {
-    this.cameraControls.autoRotate = false;
-    cancelAnimationFrame(this.lazySusanAnimation);
-
-    let currPos = this.perspCamera.position;
-    let currTarget = this.cameraControls.target;
-    let speed = 3;
-    this.animating = true;
-    let interp = (a: THREE.Vector3, b: THREE.Vector3) => {
-      let x = (a.x - b.x) / speed + b.x;
-      let y = (a.y - b.y) / speed + b.y;
-      let z = (a.z - b.z) / speed + b.z;
-      return {x: x, y: y, z: z};
-    };
-    // If we're still relatively far away from the target, go closer
-    if (currPos.distanceTo(pos) > 0.03) {
-      let newTar = interp(target, currTarget);
-      this.cameraControls.target.set(newTar.x, newTar.y, newTar.z);
-
-      let newPos = interp(pos, currPos);
-      this.perspCamera.position.set(newPos.x, newPos.y, newPos.z);
-      this.cameraControls.update();
-      this.render();
-      this.animationID =
-          requestAnimationFrame(() => this.animate(pos, target, callback));
-    } else {
-      // Once we get close enough, update flags and stop moving
-      this.animating = false;
-      this.cameraControls.target.set(target.x, target.y, target.z);
-      this.cameraControls.update();
-      this.render();
-      if (callback) {
-        callback();
-      }
-    }
-  }
-
   private removeAll() {
     this.visualizers.forEach(v => {
       v.removeAllFromScene(this.scene);
@@ -533,7 +439,8 @@ export class ScatterPlot {
     let geometry = new THREE.SphereGeometry(1, 300, 100);
     let material = new THREE.MeshPhongMaterial({
       color: 0x000000,
-      specular: (this.zAccessor && 0xffffff),  // In 2d, make sphere look flat.
+      specular:
+          (this.sceneIs3D() && 0xffffff),  // In 2d, make sphere look flat.
       emissive: 0x000000,
       shininess: 10,
       shading: THREE.SmoothShading,
@@ -547,9 +454,10 @@ export class ScatterPlot {
     this.selectionSphere.position.set(pos[0], pos[1], pos[2]);
   }
 
-  private getLayoutValues() {
+  private getLayoutValues(): Point2D {
     this.width = this.containerNode.offsetWidth;
     this.height = Math.max(1, this.containerNode.offsetHeight);
+    return [this.width, this.height];
   }
 
   /**
@@ -558,13 +466,16 @@ export class ScatterPlot {
    */
   private getPointsCoordinates() {
     // Determine max and min of each axis of our data.
-    let xExtent = d3.extent(this.dataSet.points, (p, i) => this.xAccessor(i));
-    let yExtent = d3.extent(this.dataSet.points, (p, i) => this.yAccessor(i));
-    let range = [-CUBE_LENGTH / 2, CUBE_LENGTH / 2];
+    const xExtent = d3.extent(this.dataSet.points, (p, i) => this.xAccessor(i));
+    const yExtent = d3.extent(this.dataSet.points, (p, i) => this.yAccessor(i));
+    const range = [-CUBE_LENGTH / 2, CUBE_LENGTH / 2];
+
     this.xScale.domain(xExtent).range(range);
     this.yScale.domain(yExtent).range(range);
+
     if (this.zAccessor) {
-      let zExtent = d3.extent(this.dataSet.points, (p, i) => this.zAccessor(i));
+      const zExtent =
+          d3.extent(this.dataSet.points, (p, i) => this.zAccessor(i));
       this.zScale.domain(zExtent).range(range);
     }
 
@@ -572,17 +483,84 @@ export class ScatterPlot {
     this.dataSet.points.forEach((d, i) => {
       d.projectedPoint[0] = this.xScale(this.xAccessor(i));
       d.projectedPoint[1] = this.yScale(this.yAccessor(i));
-      d.projectedPoint[2] =
-          (this.zAccessor ? this.zScale(this.zAccessor(i)) : 0);
     });
+
+    if (this.zAccessor) {
+      this.dataSet.points.forEach((d, i) => {
+        d.projectedPoint[2] = this.zScale(this.zAccessor(i));
+      });
+    } else {
+      this.dataSet.points.forEach((d, i) => {
+        d.projectedPoint[2] = 0;
+      });
+    }
   }
 
   private addAxesToScene() {
-    this.addVisualizer(new ScatterPlotVisualizerAxes(this.xScale, this.yScale));
+    this.addVisualizer(new ScatterPlotVisualizerAxes());
   }
 
   private sceneIs3D(): boolean {
-    return this.zAccessor != null;
+    return this.dimensionality === 3;
+  }
+
+  setDimensions(dimensionality: number) {
+    if ((dimensionality !== 2) && (dimensionality !== 3)) {
+      throw new RangeError('dimensionality must be 2 or 3');
+    }
+    this.dimensionality = dimensionality;
+    this.recreateCamera(dimensionality);
+  }
+
+  /** Gets the current camera position. */
+  getCameraPosition(): Point3D {
+    const currPos = this.camera.position;
+    return [currPos.x, currPos.y, currPos.z];
+  }
+
+  /** Gets the current camera target. */
+  getCameraTarget(): Point3D {
+    let currTarget = this.orbitCameraControls.target;
+    return [currTarget.x, currTarget.y, currTarget.z];
+  }
+
+  /** Sets up the camera from given position and target coordinates. */
+  setCameraPositionAndTarget(position: Point3D, target: Point3D) {
+    this.stopOrbitAnimation();
+    this.camera.position.set(position[0], position[1], position[2]);
+    this.orbitCameraControls.target.set(target[0], target[1], target[2]);
+    this.orbitCameraControls.update();
+    this.render();
+  }
+
+  /** Starts orbiting the camera around its current lookat target. */
+  startOrbitAnimation() {
+    if (!this.sceneIs3D()) {
+      return;
+    }
+    if (this.orbitAnimationId != null) {
+      this.stopOrbitAnimation();
+    }
+    this.orbitCameraControls.autoRotate = true;
+    this.orbitCameraControls.rotateSpeed =
+        ORBIT_ANIMATION_ROTATION_CYCLE_IN_SECONDS;
+    this.updateOrbitAnimation();
+  }
+
+  private updateOrbitAnimation() {
+    this.orbitCameraControls.update();
+    this.orbitAnimationId =
+        requestAnimationFrame(() => this.updateOrbitAnimation());
+  }
+
+  /** Stops the orbiting animation on the camera. */
+  stopOrbitAnimation() {
+    this.orbitCameraControls.autoRotate = false;
+    this.orbitCameraControls.rotateSpeed = ORBIT_MOUSE_ROTATION_SPEED;
+    if (this.orbitAnimationId != null) {
+      cancelAnimationFrame(this.orbitAnimationId);
+      this.orbitAnimationId = null;
+    }
   }
 
   /** Adds a visualizer to the set, will start dispatching events to it */
@@ -607,14 +585,8 @@ export class ScatterPlot {
     this.addAxesToScene();
   }
 
-  recreateScene(animate = true) {
+  recreateScene() {
     this.removeAll();
-    this.cancelAnimation();
-    if (this.sceneIs3D()) {
-      this.makeCamera3D(animate);
-    } else {
-      this.makeCamera2D(animate);
-    }
     this.visualizers.forEach(v => {
       v.onRecreateScene(this.scene, this.sceneIs3D(), this.backgroundColor);
     });
@@ -635,7 +607,6 @@ export class ScatterPlot {
   }
 
   update() {
-    this.cancelAnimation();
     this.getPointsCoordinates();
     this.visualizers.forEach(v => {
       v.onUpdate();
@@ -650,21 +621,20 @@ export class ScatterPlot {
 
     // place the light near the camera
     {
-      const lightPos = new THREE.Vector3().copy(this.perspCamera.position);
+      const lightPos = this.camera.position.clone();
       lightPos.x += 1;
       lightPos.y += 1;
       this.light.position.set(lightPos.x, lightPos.y, lightPos.z);
     }
 
     const cameraSpacePointExtents: [number, number] = getNearFarPoints(
-        this.dataSet, this.perspCamera.position, this.cameraControls.target);
+        this.dataSet, this.camera.position, this.orbitCameraControls.target);
 
     const rc = new RenderContext(
-        this.perspCamera, this.cameraControls.target, this.width, this.height,
+        this.camera, this.orbitCameraControls.target, this.width, this.height,
         cameraSpacePointExtents[0], cameraSpacePointExtents[1],
         this.labelAccessor, this.pointColors, this.pointScaleFactors,
-        this.labelIndices, this.labelScaleFactors, this.labelDefaultFontSize,
-        this.labelStrokeColor, this.labelFillColor);
+        this.labels);
 
     // Render first pass to picking target. This render fills pickingTexture
     // with colors that are actually point ids, so that sampling the texture at
@@ -674,14 +644,14 @@ export class ScatterPlot {
       v.onPickingRender(rc);
     });
 
-    this.renderer.render(this.scene, this.perspCamera, this.pickingTexture);
+    this.renderer.render(this.scene, this.camera, this.pickingTexture);
 
     // Render second pass to color buffer, to be displayed on the canvas.
     this.visualizers.forEach(v => {
       v.onRender(rc);
     });
 
-    this.renderer.render(this.scene, this.perspCamera);
+    this.renderer.render(this.scene, this.camera);
   }
 
   setPointAccessors(
@@ -721,53 +691,16 @@ export class ScatterPlot {
     this.pointScaleFactors = scaleFactors;
   }
 
-  setVisibleLabels(
-      visibleLabelIndices: Uint32Array, visibleLabelScaleFactors: Float32Array,
-      labelStrokeColor: number, labelFillColor: number,
-      labelDefaultFontSize: number) {
-    this.labelIndices = visibleLabelIndices;
-    this.labelScaleFactors = visibleLabelScaleFactors;
-    this.labelStrokeColor = labelStrokeColor;
-    this.labelFillColor = labelFillColor;
-    this.labelDefaultFontSize = labelDefaultFontSize;
+  /** Set the labels to rendered */
+  setLabels(labels: LabelRenderParams) {
+    this.labels = labels;
   }
 
   getMode(): Mode { return this.mode; }
 
   resetZoom() {
-    if (this.animating) {
-      return;
-    }
-    let resetPos = this.cameraControls.position0;
-    let resetTarget = this.cameraControls.target0;
-    this.animate(resetPos, resetTarget, () => {
-      // Start rotating when the animation is done, if we are in 3D mode.
-      if (this.zAccessor) {
-        this.startLazySusanAnimation();
-      }
-    });
-  }
-
-  /** Zoom by moving the camera toward the target. */
-  zoomStep(multiplier: number) {
-    let additiveZoom = Math.log(multiplier);
-    if (this.animating) {
-      return;
-    }
-
-    // Zoomvect is the vector along which we want to move the camera
-    // It is the (normalized) vector from the camera to its target
-    let zoomVect = new THREE.Vector3()
-                       .copy(this.cameraControls.target)
-                       .sub(this.perspCamera.position)
-                       .multiplyScalar(additiveZoom);
-    let p = new THREE.Vector3().copy(this.perspCamera.position).add(zoomVect);
-    let d = p.distanceTo(this.cameraControls.target);
-
-    // Make sure that we're not too far zoomed in. If not, zoom!
-    if ((d > MIN_ZOOM) && (d < MAX_ZOOM)) {
-      this.animate(p, this.cameraControls.target);
-    }
+    this.recreateCamera(this.dimensionality);
+    this.render();
   }
 
   setDayNightMode(isNight: boolean) {
@@ -781,28 +714,44 @@ export class ScatterPlot {
   setAxisLabels(xLabel: string, yLabel: string) {}
 
   resize(render = true) {
-    this.getLayoutValues();
-    this.perspCamera.aspect = this.width / this.height;
-    this.perspCamera.updateProjectionMatrix();
+    const [oldW, oldH] = [this.width, this.height];
+    const [newW, newH] = this.getLayoutValues();
+
+    if (this.dimensionality === 3) {
+      const camera = (this.camera as THREE.PerspectiveCamera);
+      camera.aspect = newW / newH;
+      camera.updateProjectionMatrix();
+    } else {
+      const camera = (this.camera as THREE.OrthographicCamera);
+      // Scale the ortho frustum by however much the window changed.
+      const scaleW = newW / oldW;
+      const scaleH = newH / oldH;
+      const newCamHalfWidth = ((camera.right - camera.left) * scaleW) / 2;
+      const newCamHalfHeight = ((camera.top - camera.bottom) * scaleH) / 2;
+      camera.top = newCamHalfHeight;
+      camera.bottom = -newCamHalfHeight;
+      camera.left = -newCamHalfWidth;
+      camera.right = newCamHalfWidth;
+      camera.updateProjectionMatrix();
+    }
 
     // Accouting for retina displays.
     const dpr = window.devicePixelRatio || 1;
     this.renderer.setPixelRatio(dpr);
-    this.renderer.setSize(this.width, this.height);
+    this.renderer.setSize(newW, newH);
 
     // the picking texture needs to be exactly the same as the render texture.
     {
       const renderCanvasSize = this.renderer.getSize();
-
       const pixelRatio = this.renderer.getPixelRatio();
       this.pickingTexture = new THREE.WebGLRenderTarget(
           renderCanvasSize.width * pixelRatio,
           renderCanvasSize.height * pixelRatio);
+      this.pickingTexture.texture.minFilter = THREE.LinearFilter;
     }
 
-    this.pickingTexture.texture.minFilter = THREE.LinearFilter;
     this.visualizers.forEach(v => {
-      v.onResize(this.width, this.height);
+      v.onResize(newW, newH);
     });
 
     if (render) {
diff --git a/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizer3DLabels.ts b/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizer3DLabels.ts
index ac5fd9c205c..ae59a5b1f38 100644
--- a/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizer3DLabels.ts
+++ b/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizer3DLabels.ts
@@ -20,6 +20,7 @@ import {createTexture} from './util';
 
 const FONT_SIZE = 80;
 const ONE_OVER_FONT_SIZE = 1 / FONT_SIZE;
+const LABEL_SCALE = 2.2;  // at 1:1 texel/pixel ratio
 const LABEL_COLOR = 'black';
 const LABEL_BACKGROUND = 'white';
 const MAX_CANVAS_DIMENSION = 8192;
@@ -48,17 +49,6 @@ const VERTEX_SHADER = `
     varying vec2 vUv;
     varying vec3 vColor;
 
-    float getPointScale() {
-      float normalScale = 3.0;
-      // Distance to the camera (world coordinates.) This is the scale factor.
-      // Note that positions of verts are in world space, scaled so that the
-      // lineheight is 1.
-      vec4 posCamSpace = modelViewMatrix * vec4(position, 1.0);
-      float distToCam = length(posCamSpace.z);
-      float scale = max(min(distToCam * 10.0, normalScale), distToCam * 2.0);
-      return scale * ${ONE_OVER_FONT_SIZE};
-    }
-
     void main() {
       vUv = uv;
       vColor = color;
@@ -76,10 +66,10 @@ const VERTEX_SHADER = `
 
       mat4 pointToCamera = mat4(vRight, vUp, vAt, vec4(0, 0, 0, 1));
 
-      vec2 posObj = posObj * getPointScale();
+      vec2 scaledPos = posObj * ${ONE_OVER_FONT_SIZE} * ${LABEL_SCALE};
 
-      vec4 posRotated = pointToCamera * vec4(posObj, 0.00001, 1.0);
-      vec4 mvPosition = modelViewMatrix * (vec4(position, 0.0) + posRotated);
+      vec4 posRotated = pointToCamera * vec4(scaledPos, 0, 1);
+      vec4 mvPosition = modelViewMatrix * (vec4(position, 0) + posRotated);
       gl_Position = projectionMatrix * mvPosition;
     }`;
 
@@ -305,11 +295,8 @@ export class ScatterPlotVisualizer3DLabels implements ScatterPlotVisualizer {
   }
 
   private colorSprites(pointColors?: Float32Array) {
-    if (this.geometry == null || this.dataSet == null) {
-      return;
-    }
-
-    if (pointColors == null) {
+    if (this.labelAccessor == null || this.geometry == null ||
+        this.dataSet == null || pointColors == null) {
       return;
     }
 
@@ -351,6 +338,7 @@ export class ScatterPlotVisualizer3DLabels implements ScatterPlotVisualizer {
 
   onDataSet(dataSet: DataSet, spriteImage: HTMLImageElement) {
     this.dataSet = dataSet;
+    this.labelAccessor = null;
   }
 
   onPickingRender(rc: RenderContext) {
diff --git a/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerAxes.ts b/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerAxes.ts
index b7ff4e8a8de..34197f182fa 100644
--- a/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerAxes.ts
+++ b/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerAxes.ts
@@ -17,24 +17,15 @@ import {RenderContext} from './renderContext';
 import {DataSet} from './scatterPlot';
 import {ScatterPlotVisualizer} from './scatterPlotVisualizer';
 
-const AXIS_COLOR = 0xb3b3b3;
-
 /**
  * Maintains and renders 2d and 3d axes for the scatter plot.
- * 2d axes are scaled relative to the data set.
  */
 export class ScatterPlotVisualizerAxes implements ScatterPlotVisualizer {
   private axis3D: THREE.AxisHelper;
   private axis2D: THREE.LineSegments;
   private sceneIs3D: boolean = true;
-  private xScale: d3.scale.Linear<number, number>;
-  private yScale: d3.scale.Linear<number, number>;
 
-  constructor(
-      xScale: d3.scale.Linear<number, number>,
-      yScale: d3.scale.Linear<number, number>) {
-    this.xScale = xScale;
-    this.yScale = yScale;
+  constructor() {
     this.axis3D = new THREE.AxisHelper();
   }
 
@@ -44,35 +35,14 @@ export class ScatterPlotVisualizerAxes implements ScatterPlotVisualizer {
       this.axis2D.geometry.dispose();
     }
 
-    let vertices = new Float32Array([
-      0,  // origin
-      0,
-      0,
-      this.xScale(1),  // x axis
-      0,
-      0,
-      0,  // origin
-      0,
-      0,
-      0,  // y axis
-      this.yScale(1),
-      0,
-    ]);
+    let vertices = new Float32Array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0]);
 
-    let axisColor = new THREE.Color(AXIS_COLOR);
-    let axisColors = new Float32Array([
-      axisColor.r,
-      axisColor.b,
-      axisColor.g,
-      axisColor.r,
-      axisColor.b,
-      axisColor.g,
-      axisColor.r,
-      axisColor.b,
-      axisColor.g,
-      axisColor.r,
-      axisColor.b,
-      axisColor.g,
+    const red = new THREE.Color(1, 0, 0);
+    const green = new THREE.Color(0, 1, 0);
+
+    const axisColors = new Float32Array([
+      red.r, red.g, red.b, red.r, red.g, red.b, green.r, green.g, green.b,
+      green.r, green.g, green.b
     ]);
 
     const RGB_NUM_BYTES = 3;
@@ -106,6 +76,8 @@ export class ScatterPlotVisualizerAxes implements ScatterPlotVisualizer {
       scene.remove(this.axis3D);
     } else {
       scene.remove(this.axis2D);
+      this.axis2D.material.dispose();
+      this.axis2D.geometry.dispose();
     }
   }
 
diff --git a/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerCanvasLabels.ts b/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerCanvasLabels.ts
index 9c437c30638..970101113a1 100644
--- a/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerCanvasLabels.ts
+++ b/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerCanvasLabels.ts
@@ -48,15 +48,15 @@ export class ScatterPlotVisualizerCanvasLabels implements
 
   /** Render all of the non-overlapping visible labels to the canvas. */
   private makeLabels(rc: RenderContext) {
-    if (rc.labelIndices.length === 0) {
+    if ((rc.labels == null) || (rc.labels.pointIndices.length === 0)) {
       return;
     }
 
     let strokeStylePrefix: string;
     let fillStylePrefix: string;
     {
-      const ls = new THREE.Color(rc.labelStrokeColor).multiplyScalar(255);
-      const lc = new THREE.Color(rc.labelFillColor).multiplyScalar(255);
+      const ls = new THREE.Color(rc.labels.strokeColor).multiplyScalar(255);
+      const lc = new THREE.Color(rc.labels.fillColor).multiplyScalar(255);
       strokeStylePrefix = 'rgba(' + ls.r + ',' + ls.g + ',' + ls.b + ',';
       fillStylePrefix = 'rgba(' + lc.r + ',' + lc.g + ',' + lc.b + ',';
     }
@@ -72,10 +72,12 @@ export class ScatterPlotVisualizerCanvasLabels implements
       grid = new CollisionGrid(bb, pixw / 25, pixh / 50);
     }
 
-    const opacityRange =
-        rc.farthestCameraSpacePointZ - rc.nearestCameraSpacePointZ;
+    let opacityMap = d3.scale.pow().exponent(Math.E)
+      .domain([rc.farthestCameraSpacePointZ, rc.nearestCameraSpacePointZ])
+      .range([0.1, 1]);
+
     const camPos = rc.camera.position;
-    const camToTarget = new THREE.Vector3().copy(camPos).sub(rc.cameraTarget);
+    const camToTarget = camPos.clone().sub(rc.cameraTarget);
 
     this.gc.lineWidth = 6;
     this.gc.textBaseline = 'middle';
@@ -85,13 +87,13 @@ export class ScatterPlotVisualizerCanvasLabels implements
     // Shift the label to the right of the point circle.
     const xShift = 4;
 
-    const n = Math.min(MAX_LABELS_ON_SCREEN, rc.labelIndices.length);
+    const n = Math.min(MAX_LABELS_ON_SCREEN, rc.labels.pointIndices.length);
     for (let i = 0; i < n; ++i) {
-      const index = rc.labelIndices[i];
+      const index = rc.labels.pointIndices[i];
       const point = getProjectedPointFromIndex(this.dataSet, index);
 
       // discard points that are behind the camera
-      const camToPoint = new THREE.Vector3().copy(camPos).sub(point);
+      const camToPoint = camPos.clone().sub(point);
       if (camToTarget.dot(camToPoint) < 0) {
         continue;
       }
@@ -113,20 +115,16 @@ export class ScatterPlotVisualizerCanvasLabels implements
       if (grid.insert(textBoundingBox, true)) {
         const text = rc.labelAccessor(index);
         const fontSize =
-            rc.labelDefaultFontSize * rc.labelScaleFactors[i] * dpr;
+            rc.labels.defaultFontSize * rc.labels.scaleFactors[i] * dpr;
         this.gc.font = fontSize + 'px roboto';
 
         // Now, check with properly computed width.
         textBoundingBox.hiX += this.gc.measureText(text).width - 1;
         if (grid.insert(textBoundingBox)) {
-          let p = new THREE.Vector3(point[0], point[1], point[2]);
-          const distFromNearestPoint =
-              camPos.distanceTo(p) - rc.nearestCameraSpacePointZ;
-          // Opacity is scaled between 0.2 and 1, based on how far a label is
-          // from the camera (Unless we are in 2d mode, in which case opacity is
-          // just 1!)
-          const opacity =
-              this.sceneIs3D ? 1.2 - distFromNearestPoint / opacityRange : 1;
+          let opacity = 1;
+          if (this.sceneIs3D && (rc.labels.useSceneOpacityFlags[i] === 1)) {
+            opacity = opacityMap(camToPoint.length());
+          }
           this.gc.strokeStyle = strokeStylePrefix + opacity + ')';
           this.gc.fillStyle = fillStylePrefix + opacity + ')';
           this.gc.strokeText(text, x, y);
diff --git a/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerSprites.ts b/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerSprites.ts
index 87a3f96e2e1..a350b32bc4f 100644
--- a/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerSprites.ts
+++ b/tensorflow/tensorboard/components/vz-projector/scatterPlotVisualizerSprites.ts
@@ -112,6 +112,7 @@ const FRAGMENT_SHADER_PICKING = `
   uniform bool isImage;
 
   void main() {
+    xyIndex; // Silence 'unused variable' warning.
     if (isImage) {
       gl_FragColor = vec4(vColor, 1);
     } else {
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector-app.html b/tensorflow/tensorboard/components/vz-projector/vz-projector-app.html
new file mode 100644
index 00000000000..f2a9fcad137
--- /dev/null
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector-app.html
@@ -0,0 +1,97 @@
+<!--
+@license
+Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+<link rel="import" href="../polymer/polymer.html">
+<link rel="import" href="../paper-icon-button/paper-icon-button.html">
+<link rel="import" href="../paper-tooltip/paper-tooltip.html">
+
+<link rel="import" href="vz-projector.html">
+<link rel="import" href="styles.html">
+
+<dom-module id="vz-projector-app">
+<template>
+<style include="vz-projector-styles"></style>
+<style>
+#appbar {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 0 24px;
+  height: 60px;
+  color: white;
+  background: #560731;
+}
+
+#appbar .logo {
+  font-size: 18px;
+  font-weight: 300;
+}
+
+.icons {
+  display: flex;
+}
+
+.icons a {
+  color: white;
+}
+
+vz-projector {
+  height: calc(100% - 60px);
+}
+
+#container {
+  height: 100%;
+}
+</style>
+
+<div id="container">
+  <div id="appbar">
+    <div>Embedding Projector</div>
+    <div class="icons">
+      <a title="Documentation" target="_blank" href="[[documentationLink]]">
+        <paper-icon-button icon="help-outline"></paper-icon-button>
+        <paper-tooltip position="bottom" animation-delay="0" fit-to-visible-bounds>
+          Open documentation
+        </paper-tooltip>
+      </a>
+      <a title="Report bug" target="_blank" href="[[bugReportLink]]">
+        <paper-icon-button icon="bug-report"></paper-icon-button>
+        <paper-tooltip position="bottom" animation-delay="0" fit-to-visible-bounds>
+          Report a bug
+        </paper-tooltip>
+      </a>
+    </div>
+  </div>
+  <vz-projector route-prefix="[[routePrefix]]" serving-mode="[[servingMode]]"></vz-projector>
+</div>
+<!-- Google analytics -->
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
+
+  ga('create', 'UA-46457317-5', 'auto');
+  ga('send', 'pageview');
+</script>
+</template>
+<script>
+  Polymer({
+    is: 'vz-projector-app'
+  });
+</script>
+</dom-module>
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector-colab.html b/tensorflow/tensorboard/components/vz-projector/vz-projector-colab.html
new file mode 100644
index 00000000000..2acb570b3c1
--- /dev/null
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector-colab.html
@@ -0,0 +1,32 @@
+<!--
+@license
+Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+<link rel="import" href="../polymer/polymer.html">
+<link rel="import" href="vz-projector.html">
+<dom-module id="vz-projector-colab">
+<template>
+<vz-projector serving-mode="proto" data-proto="[[dataProto]]"></vz-projector>
+</template>
+<script>
+Polymer({
+  is: 'vz-projector-colab',
+  properties: {
+    dataProto: Object
+  }
+});
+</script>
+</dom-module>
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector-dashboard.html b/tensorflow/tensorboard/components/vz-projector/vz-projector-dashboard.html
new file mode 100644
index 00000000000..f411856cdfd
--- /dev/null
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector-dashboard.html
@@ -0,0 +1,53 @@
+<!--
+@license
+Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+<link rel="import" href="../polymer/polymer.html">
+<link rel="import" href="../tf-dashboard-common/tf-no-data-warning.html">
+<link rel="import" href="vz-projector.html">
+
+<dom-module id="vz-projector-dashboard">
+<template>
+  <tf-no-data-warning
+    data-type="projector"
+    show-warning="[[dataNotFound]]"
+  ></tf-no-data-warning>
+  <template is="dom-if" if="[[!dataNotFound]]">
+    <vz-projector
+      id="projector"
+      route-prefix="[[routePrefix]]"
+      serving-mode="server"
+    ></vz-projector>
+  </template>
+<script>
+(function() {
+Polymer({
+  is: 'vz-projector-dashboard',
+  properties: {
+    dataNotFound: Boolean,
+    routePrefix: String
+  },
+  ready() {
+    var self = this;
+    d3.json(this.routePrefix + '/runs', function(err, runs) {
+      self.dataNotFound = (runs.length === 0);
+    });
+  }
+});
+})();
+</script>
+</template>
+</dom-module>
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector-data-panel.ts b/tensorflow/tensorboard/components/vz-projector/vz-projector-data-panel.ts
index 97c223bf044..068035148c5 100644
--- a/tensorflow/tensorboard/components/vz-projector/vz-projector-data-panel.ts
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector-data-panel.ts
@@ -170,8 +170,8 @@ export class DataPanel extends DataPanelPolymer {
       if (metadataFile) {
         this.dataProvider.retrieveMetadata(
             this.selectedRun, this.selectedTensor, metadata => {
-              this.updateMetadataUI(metadata.stats, metadataFile);
               this.projector.updateDataSet(ds, metadata);
+              this.updateMetadataUI(metadata.stats, metadataFile);
             });
       } else {
         this.projector.updateDataSet(ds, null);
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector-input.ts b/tensorflow/tensorboard/components/vz-projector/vz-projector-input.ts
index 4e0706390d3..35630412606 100644
--- a/tensorflow/tensorboard/components/vz-projector/vz-projector-input.ts
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector-input.ts
@@ -37,11 +37,8 @@ export class ProjectorInput extends PolymerClass {
   label: string;
 
   /** Subscribe to be called everytime the input changes. */
-  onInputChanged(listener: InputChangedListener, callImmediately = false) {
+  onInputChanged(listener: InputChangedListener) {
     this.inputChangedListeners.push(listener);
-    if (callImmediately) {
-      listener(this.paperInput.value, this.inRegexMode);
-    }
   }
 
   ready() {
@@ -98,6 +95,10 @@ export class ProjectorInput extends PolymerClass {
   getValue(): string {
     return this.paperInput.value;
   }
+
+  getInRegexMode(): boolean {
+    return this.inRegexMode;
+  }
 }
 
 document.registerElement(ProjectorInput.prototype.is, ProjectorInput);
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector-inspector-panel.html b/tensorflow/tensorboard/components/vz-projector/vz-projector-inspector-panel.html
index 02438aa8715..cf00cda7939 100644
--- a/tensorflow/tensorboard/components/vz-projector/vz-projector-inspector-panel.html
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector-inspector-panel.html
@@ -38,7 +38,6 @@ limitations under the License.
 
 .buttons {
   display: flex;
-  margin-bottom: 10px;
   height: 60px;
 }
 
@@ -119,13 +118,36 @@ limitations under the License.
 }
 
 .options a {
+  color: #727272;
+  font-size: 13px;
+  margin-left: 12px;
   text-decoration: none;
-  color: black;
 }
 
 .options a.selected {
-  color: black;
-  border-bottom: 2px solid black;
+  color: #009EFE;
+}
+
+.neighbors-options {
+  margin-top: 6px;
+}
+
+.neighbors-options .option-label, .distance .option-label {
+  color: #727272;
+  margin-right: 2px;
+  width: auto;
+}
+
+.num-neighbors-container {
+  display: inline-block;
+}
+
+#nn-slider {
+  margin: 0 -12px 0 10px;
+}
+
+.euclidian {
+  margin-right: 10px;
 }
 
 .matches-list .row {
@@ -149,17 +171,6 @@ limitations under the License.
     <button class="button set-filter">Isolate selection</button>
     <button class="button clear-selection">Clear selection</button>
   </div>
-  <div class="slider num-nn">
-    <label>
-      Number of neighbors
-      <paper-icon-button icon="help" class="help-icon"></paper-icon-button>
-      <paper-tooltip position="bottom" animation-delay="0" fit-to-visible-bounds>
-        The number of neighbors (in the original space) to show when clicking on a point.
-      </paper-tooltip>
-    </label>
-    <paper-slider id="nn-slider" pin min="5" max="1000" value="100"></paper-slider>
-    <span></span>
-  </div>
   <div class="search-by">
     <vz-projector-input id="search-box" label="Query"></vz-projector-input>
     <paper-dropdown-menu no-animations label="by">
@@ -174,18 +185,32 @@ limitations under the License.
   </div>
 </div>
 <div class="results">
-  <div class="nn">
-    <div class="distance">
-      Distance:
-      <div class="options">
-        <a class="selected cosine" href="javascript:void(0);">cosine</a> |
-        <a class="euclidean" href="javascript:void(0);">euclidean</a>
+  <div class="nn" style="display: none">
+    <div class="neighbors">
+      Neighbors
+      <div class="neighbors-options">
+        <div class="slider num-nn">
+          <span class="option-label">quantity</span>
+          <paper-icon-button icon="help" class="help-icon"></paper-icon-button>
+          <paper-tooltip position="bottom" animation-delay="0" fit-to-visible-bounds>
+            The number of neighbors (in the original space) to show when clicking on a point.
+          </paper-tooltip>
+          <paper-slider id="nn-slider" pin min="5" max="1000" value="100"></paper-slider>
+          <span class="nn-count"></span>
+        </div>
+      </div>
+      <div class="distance">
+        <span class="option-label">distance</span>
+        <div class="options">
+          <a class="selected cosine" href="javascript:void(0);">COSINE</a>
+          <a class="euclidean" href="javascript:void(0);">EUCLIDIAN</a>
+        </div>
       </div>
     </div>
     <p>Nearest points in the original space:
     <div class="nn-list"></div>
   </div>
-  <div class="matches-list">
+  <div class="matches-list" style="display: none">
     <div class="list"></div>
     <div class="limit-msg">Showing only the first 100 results...</div>
   </div>
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector-inspector-panel.ts b/tensorflow/tensorboard/components/vz-projector/vz-projector-inspector-panel.ts
index c0a15bd71c7..386475dd023 100644
--- a/tensorflow/tensorboard/components/vz-projector/vz-projector-inspector-panel.ts
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector-inspector-panel.ts
@@ -257,7 +257,7 @@ export class InspectorPanel extends PolymerClass {
     let numNNInput = this.$$('#nn-slider') as HTMLInputElement;
     let updateNumNN = () => {
       this.numNN = +numNNInput.value;
-      this.dom.select('.num-nn span').text(this.numNN);
+      this.dom.select('.num-nn .nn-count').text(this.numNN);
       if (this.selectedPointIndex != null) {
         this.projector.notifySelectionChanged([this.selectedPointIndex]);
       }
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector-projections-panel.ts b/tensorflow/tensorboard/components/vz-projector/vz-projector-projections-panel.ts
index ddae4e077e9..1c61cd09d99 100644
--- a/tensorflow/tensorboard/components/vz-projector/vz-projector-projections-panel.ts
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector-projections-panel.ts
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+
 import {DataSet, MetadataInfo, PCA_SAMPLE_DIM, Projection, SAMPLE_SIZE} from './data';
 import * as vector from './vector';
 import {Projector} from './vz-projector';
@@ -38,6 +39,8 @@ export let ProjectionsPanelPolymer = PolymerElement({
   }
 });
 
+type InputControlName = 'xLeft' | 'xRight' | 'yUp' | 'yDown';
+
 /**
  * A polymer component which handles the projection tabs in the projector.
  */
@@ -130,6 +133,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     };
     learningRateInput.addEventListener('change', updateLearningRate);
     updateLearningRate();
+    this.setupAllInputsInCustomTab();
   }
 
   dataSetUpdated(dataSet: DataSet, dim: number) {
@@ -137,8 +141,6 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     this.dim = dim;
     this.clearCentroids();
 
-    this.setupAllInputsInCustomTab();
-
     this.dom.select('#tsne-sampling')
         .style('display', dataSet.points.length > SAMPLE_SIZE ? null : 'none');
     this.dom.select('#pca-sampling')
@@ -173,9 +175,6 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   }
 
   public showTab(id: Projection) {
-    if (id === this.currentProjection) {
-      return;
-    }
     this.currentProjection = id;
 
     let tab = this.dom.select('.ink-tab[data-tab="' + id + '"]');
@@ -193,13 +192,14 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
       this.showTSNE();
     } else if (id === 'custom') {
       this.currentDataSet.stopTSNE();
-      this.showCustom();
+      this.computeAllCentroids();
+      this.reprojectCustom();
     }
   }
 
   private showTSNE() {
     this.projector.setProjection(
-        'tsne',
+        'tsne', this.is3d ? 3 : 2,
         // Accessors.
         i => this.currentDataSet.points[i].projections['tsne-0'],
         i => this.currentDataSet.points[i].projections['tsne-1'],
@@ -242,19 +242,17 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
       let z = this.pcaZ - 1;
 
       this.projector.setProjection(
-          'pca',
+          'pca', this.is3d ? 3 : 2,
           // Accessors.
           i => this.currentDataSet.points[i].projections['pca-' + x],
           i => this.currentDataSet.points[i].projections['pca-' + y],
-          this.is3d ?
-              (i => this.currentDataSet.points[i].projections['pca-' + z]) :
-              null,
+          i => this.currentDataSet.points[i].projections['pca-' + z],
           // Axis labels.
           'pca-' + x, 'pca-' + y);
     });
   }
 
-  private showCustom() {
+  private reprojectCustom() {
     if (this.centroids == null || this.centroids.xLeft == null ||
         this.centroids.xRight == null || this.centroids.yUp == null ||
         this.centroids.yDown == null) {
@@ -270,7 +268,7 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
     let yLabel = this.centroidValues.yUp + ' → ' + this.centroidValues.yDown;
 
     this.projector.setProjection(
-        'custom',
+        'custom', 2,
         // Accessors.
         i => this.currentDataSet.points[i].projections['linear-x'],
         i => this.currentDataSet.points[i].projections['linear-y'],
@@ -285,46 +283,52 @@ export class ProjectionsPanel extends ProjectionsPanelPolymer {
   }
 
   _searchByMetadataOptionChanged(newVal: string, oldVal: string) {
-    // Ignore the initial call to the observer so we don't try to create these
-    // projections pre-emptively.
-    if (oldVal) {
-      this.setupAllInputsInCustomTab(true /** callListenersImmediately */);
+    if (this.currentProjection === 'custom') {
+      this.computeAllCentroids();
+      this.reprojectCustom();
     }
   }
 
-  private setupAllInputsInCustomTab(callListenersImmediately = false) {
-    this.setupInputUIInCustomTab('xLeft', callListenersImmediately);
-    this.setupInputUIInCustomTab('xRight', callListenersImmediately);
-    this.setupInputUIInCustomTab('yUp', callListenersImmediately);
-    this.setupInputUIInCustomTab('yDown', callListenersImmediately);
+  private setupAllInputsInCustomTab() {
+    this.setupInputUIInCustomTab('xLeft');
+    this.setupInputUIInCustomTab('xRight');
+    this.setupInputUIInCustomTab('yUp');
+    this.setupInputUIInCustomTab('yDown');
   }
 
-  private setupInputUIInCustomTab(
-      name: string, callListenersImmediately = false) {
+  private computeAllCentroids() {
+    this.computeCentroid('xLeft');
+    this.computeCentroid('xRight');
+    this.computeCentroid('yUp');
+    this.computeCentroid('yDown');
+  }
+
+  private computeCentroid(name: InputControlName) {
     let input = this.querySelector('#' + name) as ProjectorInput;
+    let value = input.getValue();
+    let inRegexMode = input.getInRegexMode();
 
-    let updateInput = (value: string, inRegexMode: boolean) => {
-      if (value == null) {
-        return;
-      }
-      let result = this.getCentroid(value, inRegexMode);
-      if (result.numMatches === 0) {
-        input.message = '0 matches. Using a random vector.';
-        result.centroid = vector.rn(this.dim);
-      } else {
-        input.message = `${result.numMatches} matches.`;
-      }
-      this.centroids[name] = result.centroid;
-      this.centroidValues[name] = value;
-    };
-
-    updateInput(input.getValue(), false);
+    if (value == null) {
+      return;
+    }
+    let result = this.getCentroid(value, inRegexMode);
+    if (result.numMatches === 0) {
+      input.message = '0 matches. Using a random vector.';
+      result.centroid = vector.rn(this.dim);
+    } else {
+      input.message = `${result.numMatches} matches.`;
+    }
+    this.centroids[name] = result.centroid;
+    this.centroidValues[name] = value;
+  }
 
+  private setupInputUIInCustomTab(name: InputControlName) {
+    let input = this.querySelector('#' + name) as ProjectorInput;
     // Setup the input text.
     input.onInputChanged((input, inRegexMode) => {
-      updateInput(input, inRegexMode);
-      this.showCustom();
-    }, callListenersImmediately);
+      this.computeCentroid(name);
+      this.reprojectCustom();
+    });
   }
 
   private getCentroid(pattern: string, inRegexMode: boolean): CentroidResult {
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector.html b/tensorflow/tensorboard/components/vz-projector/vz-projector.html
index b876f59f7bb..854b476fb35 100644
--- a/tensorflow/tensorboard/components/vz-projector/vz-projector.html
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector.html
@@ -119,12 +119,10 @@ limitations under the License.
 }
 
 #wrapper-notify-msg {
-  z-index: 1;
-  position: fixed;
-  top: 10px;
-  width: 100%;
+  align-items: center;
   display: flex;
   justify-content: center;
+  width: 300px;
 }
 
 #notify-msgs {
@@ -132,15 +130,12 @@ limitations under the License.
 }
 
 .notify-msg {
-  margin: 5px 0;
-  font-weight: 500;
-  color: black;
-  background-color: #FFF9C4;
-  padding: 5px;
-  border: 1px solid #FBC02D;
   backface-visibility: hidden;
+  font-weight: 500;
+  margin: 0;
   opacity: 1;
-  transition: opacity 0.3s ease-out;
+  padding: 0;
+  transition: opacity 0.3s ease-out, height 0.3s ease-out;
 }
 
 #warning-msg {
@@ -347,9 +342,9 @@ limitations under the License.
   overflow-y: auto;
 }
 </style>
-<div id="wrapper-notify-msg">
+<paper-dialog id="wrapper-notify-msg" modal>
   <div id="notify-msgs"></div>
-</div>
+</paper-dialog>
 <div id="warning-msg"></div>
 <div id="container">
   <div id="left-pane" class="ink-panel">
@@ -373,19 +368,13 @@ limitations under the License.
       </div>
       <div class="ink-fabs">
         <paper-icon-button id="reset-zoom" class="ink-fab" alt="Reset zoom to fit all points" icon="home"></paper-icon-button>
-        <paper-tooltip for="reset-zoom" position="bottom" animation-delay="0" fit-to-visible-bounds>Reset zoom to fit all points</paper-tooltip>
-
-        <paper-icon-button id="zoom-in" class="ink-fab" alt="Zoom in" icon="add"></paper-icon-button>
-        <paper-tooltip for="zoom-in" position="bottom" animation-delay="0" fit-to-visible-bounds>Zoom in</paper-tooltip>
-
-        <paper-icon-button id="zoom-out" class="ink-fab" alt="Zoom out" icon="remove"></paper-icon-button>
-        <paper-tooltip for="zoom-out" position="bottom" animation-delay="0" fit-to-visible-bounds>Zoom out</paper-tooltip>
+        <paper-tooltip for="reset-zoom" position="left" animation-delay="0">Reset zoom to fit all points</paper-tooltip>
       </div>
     </div>
     <div class="stage">
       <div id="scatter"></div>
       <vz-projector-metadata-card id="metadata-card"></vz-projector-metadata-card>
-      <paper-icon-button raised onclick="help3dDialog.open()" icon="3d-rotation" id="help-3d-icon"></paper-icon-button>
+      <paper-icon-button raised onclick="help3dDialog.open()" icon="help-outline" id="help-3d-icon"></paper-icon-button>
       <paper-tooltip animation-delay="0" for="help-3d-icon">Help with interaction controls.</paper-tooltip>
       <paper-dialog id="help3dDialog" with-backdrop>
         <div class="main" dialog-confirm autofocus>
@@ -409,6 +398,8 @@ limitations under the License.
     <div id="bookmark-panel-container">
       <vz-projector-bookmark-panel id="bookmark-panel"></vz-projector-bookmark-panel>
     </div>
+  </div>
 </div>
-</template>
+
+</template> <!-- global template -->
 </dom-module>
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector.ts b/tensorflow/tensorboard/components/vz-projector/vz-projector.ts
index c1e83dfbbf8..957886d3605 100644
--- a/tensorflow/tensorboard/components/vz-projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector.ts
@@ -13,10 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-import {ColorOption, DataSet, MetadataInfo, Projection, State} from './data';
-import {DataProvider, getDataProvider, TensorInfo} from './data-loader';
+import {ColorOption, DataProto, DataSet, MetadataInfo, Projection, State} from './data';
+import {DataProvider, getDataProvider, ServingMode, TensorInfo} from './data-loader';
 import {HoverContext, HoverListener} from './hoverContext';
 import * as knn from './knn';
+import {LabelRenderParams} from './renderContext';
 import {Mode, ScatterPlot} from './scatterPlot';
 import {ScatterPlotVisualizer3DLabels} from './scatterPlotVisualizer3DLabels';
 import {ScatterPlotVisualizerCanvasLabels} from './scatterPlotVisualizerCanvasLabels';
@@ -34,8 +35,8 @@ import {PolymerElement, PolymerHTMLElement} from './vz-projector-util';
 const LABEL_FONT_SIZE = 10;
 const LABEL_SCALE_DEFAULT = 1.0;
 const LABEL_SCALE_LARGE = 1.7;
-const LABEL_FILL_COLOR = 0xFFFFFF;
-const LABEL_STROKE_COLOR = 0x000000;
+const LABEL_FILL_COLOR = 0x000000;
+const LABEL_STROKE_COLOR = 0xFFFFFF;
 
 const POINT_COLOR_UNSELECTED = 0x888888;
 const POINT_COLOR_NO_SELECTION = 0x7575D9;
@@ -43,6 +44,9 @@ const POINT_COLOR_SELECTED = 0xFA6666;
 const POINT_COLOR_HOVER = 0x760B4F;
 const POINT_COLOR_MISSING = 'black';
 
+const LABELS_3D_COLOR_UNSELECTED = 0xFFFFFF;
+const LABELS_3D_COLOR_NO_SELECTION = 0xFFFFFF;
+
 const POINT_SCALE_DEFAULT = 1.0;
 const POINT_SCALE_SELECTED = 1.2;
 const POINT_SCALE_NEIGHBOR = 1.2;
@@ -57,8 +61,9 @@ const THRESHOLD_DIM_NORMALIZE = 50;
 export let ProjectorPolymer = PolymerElement({
   is: 'vz-projector',
   properties: {
-    // Private.
-    routePrefix: String
+    routePrefix: String,
+    dataProto: {type: String, observer: '_dataProtoChanged'},
+    servingMode: String
   }
 });
 
@@ -66,6 +71,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
                                                            HoverContext {
   // The working subset of the data source's original data set.
   currentDataSet: DataSet;
+  servingMode: ServingMode;
 
   private selectionChangedListeners: SelectionChangedListener[];
   private hoverListeners: HoverListener[];
@@ -101,7 +107,6 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     this.hoverListeners = [];
     this.selectedPointIndices = [];
     this.neighborsOfFirstPoint = [];
-
     this.dom = d3.select(this);
     this.dataPanel = this.$['data-panel'] as DataPanel;
     this.inspectorPanel = this.$['inspector-panel'] as InspectorPanel;
@@ -111,14 +116,9 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     this.metadataCard = this.$['metadata-card'] as MetadataCard;
     this.statusBar = this.dom.select('#status-bar');
     this.bookmarkPanel = this.$['bookmark-panel'] as BookmarkPanel;
-
-    getDataProvider(this.routePrefix, dataProvider => {
-      this.dataProvider = dataProvider;
-      this.dataPanel.initialize(this, dataProvider);
-      this.bookmarkPanel.initialize(this, dataProvider);
-    });
     this.scopeSubtree(this.$$('#wrapper-notify-msg'), true);
     this.setupUIControls();
+    this.initializeDataProvider();
   }
 
   setSelectedLabelOption(labelOption: string) {
@@ -223,6 +223,21 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     this.hoverListeners.forEach(l => l(pointIndex));
   }
 
+  _dataProtoChanged(dataProtoString: string) {
+    let dataProto = dataProtoString ?
+        JSON.parse(dataProtoString) as DataProto : null;
+    this.initializeDataProvider(dataProto);
+  }
+
+  private initializeDataProvider(dataProto?: DataProto) {
+    getDataProvider(this.servingMode, dataProto, this.routePrefix,
+        dataProvider => {
+      this.dataProvider = dataProvider;
+      this.dataPanel.initialize(this, dataProvider);
+      this.bookmarkPanel.initialize(this, dataProvider);
+    });
+  }
+
   private getLegendPointColorer(colorOption: ColorOption):
       (index: number) => string {
     if ((colorOption == null) || (colorOption.map == null)) {
@@ -239,33 +254,40 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     return colorer;
   }
 
-  private getVisibleLabelCount(
+  private generateVisibleLabelRenderParams(
       selectedPointIndices: number[], neighborsOfFirstPoint: knn.NearestEntry[],
-      hoverPointIndex: number) {
-    return selectedPointIndices.length + neighborsOfFirstPoint.length +
-        ((hoverPointIndex != null) ? 1 : 0);
-  }
-
-  private generateVisibleLabelIndicesArray(
-      selectedPointIndices: number[], neighborsOfFirstPoint: knn.NearestEntry[],
-      hoverPointIndex: number): Uint32Array {
+      hoverPointIndex: number): LabelRenderParams {
     if (this.currentDataSet == null) {
-      return new Uint32Array(0);
+      return null;
     }
 
-    const visibleLabels = new Uint32Array(this.getVisibleLabelCount(
-        selectedPointIndices, neighborsOfFirstPoint, hoverPointIndex));
+    const n = selectedPointIndices.length + neighborsOfFirstPoint.length +
+        ((hoverPointIndex != null) ? 1 : 0);
+
+    const visibleLabels = new Uint32Array(n);
+    const scale = new Float32Array(n);
+    const opacityFlags = new Int8Array(n);
+
+    scale.fill(LABEL_SCALE_DEFAULT);
+    opacityFlags.fill(1);
+
     let dst = 0;
 
     if (hoverPointIndex != null) {
-      visibleLabels[dst++] = hoverPointIndex;
+      visibleLabels[dst] = hoverPointIndex;
+      scale[dst] = LABEL_SCALE_LARGE;
+      opacityFlags[dst] = 0;
+      ++dst;
     }
 
     // Selected points
     {
       const n = selectedPointIndices.length;
       for (let i = 0; i < n; ++i) {
-        visibleLabels[dst++] = selectedPointIndices[i];
+        visibleLabels[dst] = selectedPointIndices[i];
+        scale[dst] = LABEL_SCALE_LARGE;
+        opacityFlags[dst] = (n === 1) ? 0 : 1;
+        ++dst;
       }
     }
 
@@ -277,35 +299,9 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
       }
     }
 
-    return visibleLabels;
-  }
-
-  private generateVisibleLabelScaleFactorsArray(
-      selectedPointIndices: number[], neighborsOfFirstPoint: knn.NearestEntry[],
-      hoverPointIndex: number): Float32Array {
-    if (this.currentDataSet == null) {
-      return new Float32Array(0);
-    }
-
-    const scale = new Float32Array(this.getVisibleLabelCount(
-        selectedPointIndices, neighborsOfFirstPoint, hoverPointIndex));
-    scale.fill(LABEL_SCALE_DEFAULT);
-
-    let dst = 0;
-
-    if (hoverPointIndex != null) {
-      scale[dst++] = LABEL_SCALE_LARGE;
-    }
-
-    // Selected points
-    {
-      const n = selectedPointIndices.length;
-      for (let i = 0; i < n; ++i) {
-        scale[dst++] = LABEL_SCALE_LARGE;
-      }
-    }
-
-    return scale;
+    return new LabelRenderParams(
+        visibleLabels, scale, opacityFlags, LABEL_FONT_SIZE, LABEL_FILL_COLOR,
+        LABEL_STROKE_COLOR);
   }
 
   private generateScatterPlotScaleFactorArray(
@@ -344,6 +340,15 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     return scale;
   }
 
+  private get3DLabelModeButton(): any {
+    return this.querySelector('#labels3DMode');
+  }
+
+  private get3DLabelMode(): boolean {
+    const label3DModeButton = this.get3DLabelModeButton();
+    return (label3DModeButton as any).active;
+  }
+
   private generateScatterPlotColorArray(
       legendPointColorer: (index: number) => string,
       selectedPointIndices: number[], neighborsOfFirstPoint: knn.NearestEntry[],
@@ -354,12 +359,20 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
 
     const colors = new Float32Array(this.currentDataSet.points.length * 3);
 
+    let unselectedColor = POINT_COLOR_UNSELECTED;
+    let noSelectionColor = POINT_COLOR_NO_SELECTION;
+
+    if (this.get3DLabelMode()) {
+      unselectedColor = LABELS_3D_COLOR_UNSELECTED;
+      noSelectionColor = LABELS_3D_COLOR_NO_SELECTION;
+    }
+
     // Give all points the unselected color.
     {
       const n = this.currentDataSet.points.length;
       let dst = 0;
       if (selectedPointIndices.length > 0) {
-        const c = new THREE.Color(POINT_COLOR_UNSELECTED);
+        const c = new THREE.Color(unselectedColor);
         for (let i = 0; i < n; ++i) {
           colors[dst++] = c.r;
           colors[dst++] = c.g;
@@ -374,7 +387,7 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
             colors[dst++] = c.b;
           }
         } else {
-          const c = new THREE.Color(POINT_COLOR_NO_SELECTION);
+          const c = new THREE.Color(noSelectionColor);
           for (let i = 0; i < n; ++i) {
             colors[dst++] = c.r;
             colors[dst++] = c.g;
@@ -439,25 +452,21 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     if (this.normalizeData) {
       this.currentDataSet.normalize();
     }
-    this.scatterPlot.setDataSet(this.currentDataSet, this.dataSet.spriteImage);
-    this.updateScatterPlot();
     this.dim = this.currentDataSet.dim[1];
     this.dom.select('span.numDataPoints').text(this.currentDataSet.dim[0]);
     this.dom.select('span.dim').text(this.currentDataSet.dim[1]);
 
     this.projectionsPanel.dataSetUpdated(this.currentDataSet, this.dim);
+
+    this.scatterPlot.setDataSet(this.currentDataSet, this.dataSet.spriteImage);
+    this.updateScatterPlot();
   }
 
   private setupUIControls() {
     // View controls
     this.querySelector('#reset-zoom').addEventListener('click', () => {
       this.scatterPlot.resetZoom();
-    });
-    this.querySelector('#zoom-in').addEventListener('click', () => {
-      this.scatterPlot.zoomStep(2);
-    });
-    this.querySelector('#zoom-out').addEventListener('click', () => {
-      this.scatterPlot.zoomStep(0.5);
+      this.scatterPlot.startOrbitAnimation();
     });
 
     let selectModeButton = this.querySelector('#selectMode');
@@ -470,14 +479,19 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
       this.scatterPlot.setDayNightMode((nightModeButton as any).active);
     });
 
-    let labels3DModeButton = this.querySelector('#labels3DMode');
+    const labels3DModeButton = this.get3DLabelModeButton();
     labels3DModeButton.addEventListener('click', () => {
       this.createVisualizers((labels3DModeButton as any).active);
       this.scatterPlot.recreateScene();
+      this.updateScatterPlot();
       this.scatterPlot.update();
     });
 
     window.addEventListener('resize', () => {
+      let container = this.dom.select('#container');
+      let parentHeight =
+          (container.node().parentNode as HTMLElement).clientHeight;
+      container.style('height', parentHeight + 'px');
       this.scatterPlot.resize();
     });
 
@@ -525,17 +539,13 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
     const pointScaleFactors = this.generateScatterPlotScaleFactorArray(
         this.selectedPointIndices, this.neighborsOfFirstPoint,
         this.hoverPointIndex);
-    const visibleLabelIndices = this.generateVisibleLabelIndicesArray(
-        this.selectedPointIndices, this.neighborsOfFirstPoint,
-        this.hoverPointIndex);
-    const visibleLabelScaleFactors = this.generateVisibleLabelScaleFactorsArray(
+    const labels = this.generateVisibleLabelRenderParams(
         this.selectedPointIndices, this.neighborsOfFirstPoint,
         this.hoverPointIndex);
+
     this.scatterPlot.setPointColors(pointColors);
     this.scatterPlot.setPointScaleFactors(pointScaleFactors);
-    this.scatterPlot.setVisibleLabels(
-        visibleLabelIndices, visibleLabelScaleFactors, LABEL_STROKE_COLOR,
-        LABEL_FILL_COLOR, LABEL_FONT_SIZE);
+    this.scatterPlot.setLabels(labels);
     this.scatterPlot.render();
   }
 
@@ -576,21 +586,21 @@ export class Projector extends ProjectorPolymer implements SelectionContext,
   }
 
   setProjection(
-      projection: Projection, xAccessor: (index: number) => number,
+      projection: Projection, dimensionality: number,
+      xAccessor: (index: number) => number,
       yAccessor: (index: number) => number,
       zAccessor: (index: number) => number, xAxisLabel: string,
       yAxisLabel: string, deferUpdate = false) {
     this.selectedProjection = projection;
+    this.scatterPlot.setDimensions(dimensionality);
     this.scatterPlot.showTickLabels(false);
     this.scatterPlot.setPointAccessors(xAccessor, yAccessor, zAccessor);
     this.scatterPlot.setAxisLabels(xAxisLabel, yAxisLabel);
     if (!deferUpdate) {
       this.scatterPlot.update();
     }
-    // Don't animate if we've defered updating as expensive computation is
-    // happening to compute the projections, and there's no reason to animate
-    // around non-existence projections.
-    this.scatterPlot.recreateScene(!deferUpdate /** animate */);
+
+    this.scatterPlot.recreateScene();
   }
 
   notifyProjectionsUpdated() {
diff --git a/tensorflow/tensorboard/plugins/base_plugin.py b/tensorflow/tensorboard/plugins/base_plugin.py
index 3ee392d1b6b..0b9199b4daf 100644
--- a/tensorflow/tensorboard/plugins/base_plugin.py
+++ b/tensorflow/tensorboard/plugins/base_plugin.py
@@ -40,7 +40,7 @@ class TBPlugin(object):
     self.handler = handler
 
   @abstractmethod
-  def get_plugin_handlers(self, run_paths):
+  def get_plugin_handlers(self, run_paths, logdir):
     """Returns a set of http handlers that the plugin implements.
 
     Each handler gets registered with the tensorboard handler and is served
@@ -48,6 +48,7 @@ class TBPlugin(object):
 
     Args:
       run_paths: A dict mapping a run name to an event file path.
+      logdir: The logging directory TensorBoard was started with.
 
     Returns:
       A dict mapping route paths to http handler methods.
diff --git a/tensorflow/tensorboard/plugins/projector/plugin.py b/tensorflow/tensorboard/plugins/projector/plugin.py
index 066c04d34d3..1e159e613aa 100644
--- a/tensorflow/tensorboard/plugins/projector/plugin.py
+++ b/tensorflow/tensorboard/plugins/projector/plugin.py
@@ -44,8 +44,9 @@ LIMIT_NUM_POINTS = 50000
 class ProjectorPlugin(TBPlugin):
   """Embedding projector."""
 
-  def get_plugin_handlers(self, run_paths):
-    self.configs, self.config_fpaths = self._read_config_files(run_paths)
+  def get_plugin_handlers(self, run_paths, logdir):
+    self.configs, self.config_fpaths = self._read_config_files(run_paths,
+                                                               logdir)
     self.readers = {}
 
     return {
@@ -56,7 +57,12 @@ class ProjectorPlugin(TBPlugin):
         BOOKMARKS_ROUTE: self._serve_bookmarks,
     }
 
-  def _read_config_files(self, run_paths):
+  def _read_config_files(self, run_paths, logdir):
+    # If there are no summary event files, the projector can still work,
+    # thus treating the `logdir` as the model checkpoint directory.
+    if not run_paths:
+      run_paths['.'] = logdir
+
     configs = {}
     config_fpaths = {}
     for run_name, logdir in run_paths.items():
@@ -110,11 +116,18 @@ class ProjectorPlugin(TBPlugin):
       return embedding_info.bookmarks_path
     return None
 
+  def _canonical_tensor_name(self, tensor_name):
+    if ':' not in tensor_name:
+      return tensor_name + ':0'
+    else:
+      return tensor_name
+
   def _get_embedding_info_for_tensor(self, tensor_name, config):
     if not config.embedding:
       return None
     for info in config.embedding:
-      if info.tensor_name == tensor_name:
+      if (self._canonical_tensor_name(info.tensor_name) ==
+          self._canonical_tensor_name(tensor_name)):
         return info
     return None
 
diff --git a/tensorflow/tools/ci_build/builds/test_installation.sh b/tensorflow/tools/ci_build/builds/test_installation.sh
index 9057aeb8184..5d7d6ec3f1a 100755
--- a/tensorflow/tools/ci_build/builds/test_installation.sh
+++ b/tensorflow/tools/ci_build/builds/test_installation.sh
@@ -51,6 +51,9 @@
 # will force the Python install tests to run serially, overriding than the
 # concurrent testing behavior.
 #
+# TF_GPU_COUNT, Set the number of GPUs in the system. We run only this many
+# concurrent tests when running GPU tests.
+#
 # TF_BUILD_EXTRA_EXCLUSIVE_INSTALL_TESTS, add to the default list of
 # Python unit tests to run in exclusive mode (i.e., not concurrently with
 # other tests), separated with colons
@@ -129,6 +132,7 @@ echo "PY_TEST_GPU_BLACKLIST: ${PY_TEST_GPU_BLACKLIST}"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/builds_common.sh"
 
+TF_GPU_COUNT=${TF_GPU_COUNT:-8}
 
 # Process input arguments
 IS_VIRTUALENV=0
@@ -421,7 +425,7 @@ fi
 
 if [[ ! -z "${TF_BUILD_SERIAL_INSTALL_TESTS}" ]] &&
    [[ "${TF_BUILD_SERIAL_INSTALL_TESTS}" != "0" ]]; then
-  N_JOBS=1
+  N_JOBS=$TF_GPU_COUNT
 fi
 
 echo "Running Python tests-on-install with ${N_JOBS} concurrent jobs..."
@@ -481,7 +485,8 @@ while true; do
     TEST_LOGS="${TEST_LOGS} ${TEST_LOG}"
 
     # Launch test asynchronously
-    "${SCRIPT_DIR}/py_test_delegate.sh" \
+    "${SCRIPT_DIR}/../gpu_build/parallel_gpu_execute.sh" \
+      "${SCRIPT_DIR}/py_test_delegate.sh" \
       "${PYTHON_BIN_PATH}" "${PY_TEST_DIR}/${TEST_BASENAME}" "${TEST_LOG}" &
 
     if [[ "${TEST_COUNTER}" -ge "${N_PAR_TESTS}" ]]; then
diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index f3f8c74a1af..30ffe3fd77e 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -59,11 +59,11 @@
 #   TF_BUILD_BAZEL_CLEAN:
 #                      Will perform "bazel clean", if and only if this variable
 #                      is set to any non-empty and non-0 value
-#   TF_BUILD_SERIAL_TESTS:
-#                      Build parallely, but test serially
-#                      (i.e., bazel test --job=1), potentially useful for
-#                      builds where the tests cannot be run in parallel due to
-#                      resource contention (e.g., for GPU builds)
+#   TF_GPU_COUNT:
+#                      Run this many parallel tests for serial builds.
+#                      For now, only can be edited for PIP builds.
+#                      TODO(gunan): Find a way to pass this environment variable
+#                      to the script bazel runs (using --run_under).
 #   TF_BUILD_TEST_TUTORIALS:
 #                      If set to any non-empty and non-0 value, will perform
 #                      tutorials tests (Applicable only if TF_BUILD_IS_PIP is
@@ -104,7 +104,6 @@ die () {
   exit 1
 }
 
-
 ##########################################################
 # Default configuration
 CI_BUILD_DIR="tensorflow/tools/ci_build"
@@ -123,13 +122,15 @@ DO_DOCKER=1
 BAZEL_CMD="bazel test"
 BAZEL_BUILD_ONLY_CMD="bazel build"
 BAZEL_CLEAN_CMD="bazel clean"
-BAZEL_SERIAL_FLAG="--jobs=1"
 
 PIP_CMD="${CI_BUILD_DIR}/builds/pip.sh"
 PIP_TEST_TUTORIALS_FLAG="--test_tutorials"
 PIP_INTEGRATION_TESTS_FLAG="--integration_tests"
 ANDROID_CMD="${CI_BUILD_DIR}/builds/android.sh"
 
+TF_GPU_COUNT=${TF_GPU_COUNT:-8}
+PARALLEL_GPU_TEST_CMD='//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute'
+
 BENCHMARK_CMD="${CI_BUILD_DIR}/builds/benchmark.sh"
 
 BAZEL_TARGET="//tensorflow/..."
@@ -166,7 +167,6 @@ echo "  TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS="\
 echo "  TF_BUILD_APPEND_ARGUMENTS=${TF_BUILD_APPEND_ARGUMENTS}"
 echo "  TF_BUILD_BAZEL_TARGET=${TF_BUILD_BAZEL_TARGET}"
 echo "  TF_BUILD_BAZEL_CLEAN=${TF_BUILD_BAZEL_CLEAN}"
-echo "  TF_BUILD_SERIAL_TESTS=${TF_BUILD_SERIAL_TESTS}"
 echo "  TF_BUILD_TEST_TUTORIALS=${TF_BUILD_TEST_TUTORIALS}"
 echo "  TF_BUILD_INTEGRATION_TESTS=${TF_BUILD_INTEGRATION_TESTS}"
 echo "  TF_BUILD_RUN_BENCHMARKS=${TF_BUILD_RUN_BENCHMARKS}"
@@ -328,29 +328,17 @@ if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] ||
   fi
 
   if [[ ${CTYPE} == "cpu" ]] || \
-     [[ ${CTYPE} == "debian.jessie.cpu" ]] || \
-     [[ ${CTYPE} == "gpu" ]]; then
-    # Run Bazel
-    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} "\
-"${EXTRA_ARGS} ${BAZEL_TARGET}"
-    NO_PIP_MAIN_CMD=$(str_strip "${NO_PIP_MAIN_CMD}")
-
-    if [[ ! -z "${TF_BUILD_SERIAL_TESTS}" ]] &&
-       [[ "${TF_BUILD_SERIAL_TESTS}" != "0" ]]; then
-      # Break the operation into two steps: build and test
-      # The 1st (build) step will be done in parallel, as default
-      # But the 2nd (test) step will be done serially.
-
-      BUILD_ONLY_CMD="${BAZEL_BUILD_ONLY_CMD} ${OPT_FLAG} "\
-"${EXTRA_ARGS} ${BAZEL_TARGET}"
-      echo "Build-only command: ${BUILD_ONLY_CMD}"
-
-      NO_PIP_MAIN_CMD="${BUILD_ONLY_CMD} && "\
-"${BAZEL_CMD} ${OPT_FLAG} ${BAZEL_SERIAL_FLAG} "\
-"${EXTRA_ARGS} ${BAZEL_TARGET}"
-      echo "Parallel-build + serial-test command: ${NO_PIP_MAIN_CMD}"
-    fi
+     [[ ${CTYPE} == "debian.jessie.cpu" ]]; then
+    # CPU only command, fully parallel.
+    NO_PIP_MAIN_CMD="${MAIN_CMD} ${BAZEL_CMD} ${OPT_FLAG} ${EXTRA_ARGS} "\
+"${BAZEL_TARGET}"
+  elif [[ ${CTYPE} == "gpu" ]]; then
+    # GPU only command, run as many jobs as the GPU count only.
+    NO_PIP_MAIN_CMD="${BAZEL_CMD} ${OPT_FLAG} "\
+"--local_test_jobs=${TF_GPU_COUNT} "\
+"--run_under=${PARALLEL_GPU_TEST_CMD} ${EXTRA_ARGS} ${BAZEL_TARGET}"
   elif [[ ${CTYPE} == "android" ]]; then
+    # Run android specific script for android build.
     NO_PIP_MAIN_CMD="${ANDROID_CMD} ${OPT_FLAG} "
   fi
 
diff --git a/tensorflow/tools/ci_build/gpu_build/BUILD b/tensorflow/tools/ci_build/gpu_build/BUILD
new file mode 100644
index 00000000000..60f1c3ee307
--- /dev/null
+++ b/tensorflow/tools/ci_build/gpu_build/BUILD
@@ -0,0 +1,22 @@
+# Description:
+# TensorFlow is a computational framework, primarily for use in machine
+# learning applications.
+
+licenses(["notice"])  # Apache 2.0
+
+sh_binary(
+    name = "parallel_gpu_execute",
+    srcs = ["parallel_gpu_execute.sh"],
+)
+
+filegroup(
+    name = "all_files",
+    srcs = glob(
+        ["**/*"],
+        exclude = [
+            "**/METADATA",
+            "**/OWNERS",
+        ],
+    ),
+    visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
new file mode 100755
index 00000000000..04665229431
--- /dev/null
+++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+#
+# A script to run multiple GPU tests in parallel controlled with an environment
+# variable. This script will assume that when it runs, one of the locks are
+# already released. So the program calling this script is expected to make sure
+# that only $TF_GPU_COUNT processes are running at any gien time.
+#
+# Required environment variables:
+#     TF_GPU_COUNT = Number of GPUs available. This HAS TO BE IN SYNC with the
+#                    value of --local_test_jobs flag for bazel.
+
+TF_GPU_COUNT=${TF_GPU_COUNT:-8}
+
+for i in `seq 0 $((TF_GPU_COUNT-1))`; do
+  exec {lock_fd}>/var/lock/gpulock$i || exit 1
+  if flock -n "$lock_fd";
+  then
+    (
+      # This export only works within the brackets, so it is isolated to one
+      # single command.
+      export CUDA_VISIBLE_DEVICES=$i
+      echo "Running test $@ on GPU $CUDA_VISIBLE_DEVICES"
+      $@
+    )
+    flock -u "$lock_fd"
+    exit 0
+  fi
+done
+
diff --git a/tensorflow/tools/docker/Dockerfile b/tensorflow/tools/docker/Dockerfile
index 01781123203..bd0b4cc3956 100644
--- a/tensorflow/tools/docker/Dockerfile
+++ b/tensorflow/tools/docker/Dockerfile
@@ -29,6 +29,7 @@ RUN pip --no-cache-dir install \
         matplotlib \
         numpy \
         scipy \
+        sklearn \
         && \
     python -m ipykernel.kernelspec
 
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 1c1f2e2059b..e14653d58a6 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -30,6 +30,8 @@ RUN pip --no-cache-dir install \
         jupyter \
         matplotlib \
         numpy \
+        scipy \
+        sklearn \
         && \
     python -m ipykernel.kernelspec
 
diff --git a/tensorflow/tools/docker/Dockerfile.gpu b/tensorflow/tools/docker/Dockerfile.gpu
index 22403223e1a..aebff3474b8 100644
--- a/tensorflow/tools/docker/Dockerfile.gpu
+++ b/tensorflow/tools/docker/Dockerfile.gpu
@@ -29,6 +29,7 @@ RUN pip --no-cache-dir install \
         matplotlib \
         numpy \
         scipy \
+        sklearn \
         && \
     python -m ipykernel.kernelspec
 
diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md
index f897f0a2b47..77fd8fc0d4f 100644
--- a/tensorflow/tools/docker/README.md
+++ b/tensorflow/tools/docker/README.md
@@ -9,7 +9,7 @@ General installation instructions are
 [on the Docker site](https://docs.docker.com/installation/), but we give some
 quick links here:
 
-* [OSX](https://docs.docker.com/installation/mac/): [docker toolbox](https://www.docker.com/toolbox)
+* [OSX](https://www.docker.com/products/docker#/mac)
 * [ubuntu](https://docs.docker.com/engine/installation/linux/ubuntulinux/)
 
 ## Which containers exist?
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 6e3def96902..6db0e40b8bb 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -34,9 +34,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
   native.http_archive(
     name = "gemmlowp",
-    url = "http://github.com/google/gemmlowp/archive/8b20dd2ce142115857220bd6a35e8a081b3e0829.tar.gz",
-    sha256 = "9cf5f1e3d64b3632dbae5c65efb79f4374ca9ac362d788fc61e086af937ff6d7",
-    strip_prefix = "gemmlowp-8b20dd2ce142115857220bd6a35e8a081b3e0829",
+    url = "http://github.com/google/gemmlowp/archive/c0bacf11fb509a2cbe15a97362a2df067ffd57a2.tar.gz",
+    sha256 = "dc64a38f9927db18748d9024987c9b102115e25bc2be4b76aa8e422b8f83d882",
+    strip_prefix = "gemmlowp-c0bacf11fb509a2cbe15a97362a2df067ffd57a2",
   )
 
   native.new_http_archive(
@@ -98,9 +98,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
   native.http_archive(
     name = "protobuf",
-    url = "http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
-    sha256 = "0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
-    strip_prefix = "protobuf-3.1.0",
+    url = "http://github.com/google/protobuf/archive/c2b3e70efd2038a54ef8973771ac58192885125e.tar.gz",
+    sha256 = "eafc1bc4c27970d62effe64ba6610823fdd66711f440d8ca4a168167786a2fcb",
+    strip_prefix = "protobuf-c2b3e70efd2038a54ef8973771ac58192885125e",
   )
 
   native.new_http_archive(