diff --git a/BUILD b/BUILD
index e69de29bb2d..8b137891791 100644
--- a/BUILD
+++ b/BUILD
@@ -0,0 +1 @@
+
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 6c2582940ef..e8391c0f70a 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -29,6 +29,15 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "android_arm64",
+    values = {
+        "crosstool_top": "//external:android/crosstool",
+        "android_cpu": "arm64-v8a",
+    },
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
     name = "darwin",
     values = {"cpu": "darwin"},
@@ -95,6 +104,7 @@ filegroup(
         "//tensorflow/contrib/ffmpeg/default:all_files",
         "//tensorflow/contrib/framework:all_files",
         "//tensorflow/contrib/graph_editor:all_files",
+        "//tensorflow/contrib/grid_rnn:all_files",
         "//tensorflow/contrib/layers:all_files",
         "//tensorflow/contrib/layers/kernels:all_files",
         "//tensorflow/contrib/learn:all_files",
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index 589f001b142..8dcd6a118bf 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -87,7 +87,7 @@ TEST(CApi, AllocateTensor) {
 static void TestEncodeDecode(int line,
                              const std::vector<tensorflow::string>& data) {
   const tensorflow::int64 n = data.size();
-  for (std::vector<tensorflow::int64> dims :
+  for (const std::vector<tensorflow::int64>& dims :
        std::vector<std::vector<tensorflow::int64>>{
            {n}, {1, n}, {n, 1}, {n / 2, 2}}) {
     // Create C++ Tensor
diff --git a/tensorflow/cc/tutorials/example_trainer.cc b/tensorflow/cc/tutorials/example_trainer.cc
index 27ff0914d5f..f2ecd2eddc2 100644
--- a/tensorflow/cc/tutorials/example_trainer.cc
+++ b/tensorflow/cc/tutorials/example_trainer.cc
@@ -37,7 +37,7 @@ namespace tensorflow {
 namespace example {
 
 struct Options {
-  int num_concurrent_sessions = 10;  // The number of concurrent sessions
+  int num_concurrent_sessions = 1;   // The number of concurrent sessions
   int num_concurrent_steps = 10;     // The number of concurrent steps
   int num_iterations = 100;          // Each step repeats this many times
   bool use_gpu = false;              // Whether to use gpu in the training
@@ -108,10 +108,11 @@ void ConcurrentSteps(const Options* opts, int session_index) {
 
   // Spawn M threads for M concurrent steps.
   const int M = opts->num_concurrent_steps;
-  thread::ThreadPool step_threads(Env::Default(), "trainer", M);
+  std::unique_ptr<thread::ThreadPool> step_threads(
+      new thread::ThreadPool(Env::Default(), "trainer", M));
 
   for (int step = 0; step < M; ++step) {
-    step_threads.Schedule([&session, opts, session_index, step]() {
+    step_threads->Schedule([&session, opts, session_index, step]() {
       // Randomly initialize the input.
       Tensor x(DT_FLOAT, TensorShape({2, 1}));
       auto x_flat = x.flat<float>();
@@ -139,12 +140,19 @@ void ConcurrentSteps(const Options* opts, int session_index) {
     });
   }
 
+  // Delete the threadpool, thus waiting for all threads to complete.
+  step_threads.reset(nullptr);
   TF_CHECK_OK(session->Close());
 }
 
 void ConcurrentSessions(const Options& opts) {
   // Spawn N threads for N concurrent sessions.
   const int N = opts.num_concurrent_sessions;
+
+  // At the moment our Session implementation only allows
+  // one concurrently computing Session on GPU.
+  CHECK_EQ(1, N) << "Currently can only have one concurrent session.";
+
   thread::ThreadPool session_threads(Env::Default(), "trainer", N);
   for (int i = 0; i < N; ++i) {
     session_threads.Schedule(std::bind(&ConcurrentSteps, &opts, i));
diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD
index 0000c889770..37ce250843c 100644
--- a/tensorflow/contrib/bayesflow/BUILD
+++ b/tensorflow/contrib/bayesflow/BUILD
@@ -23,6 +23,7 @@ cuda_py_test(
     srcs = ["python/kernel_tests/entropy_test.py"],
     additional_deps = [
         ":bayesflow_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -34,6 +35,7 @@ cuda_py_test(
     srcs = ["python/kernel_tests/monte_carlo_test.py"],
     additional_deps = [
         ":bayesflow_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -45,6 +47,7 @@ cuda_py_test(
     srcs = ["python/kernel_tests/special_math_test.py"],
     additional_deps = [
         ":bayesflow_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -56,6 +59,7 @@ cuda_py_test(
     srcs = ["python/kernel_tests/stochastic_graph_test.py"],
     additional_deps = [
         ":bayesflow_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -67,6 +71,7 @@ cuda_py_test(
     srcs = ["python/kernel_tests/variational_inference_test.py"],
     additional_deps = [
         ":bayesflow_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -78,6 +83,7 @@ cuda_py_test(
     srcs = ["python/kernel_tests/stochastic_tensor_test.py"],
     additional_deps = [
         ":bayesflow_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -89,6 +95,7 @@ cuda_py_test(
     srcs = ["examples/reinforce_simple/reinforce_simple_example.py"],
     additional_deps = [
         ":bayesflow_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/special_math_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/special_math_test.py
index 569cfb448c6..8bdea45efca 100644
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/special_math_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/special_math_test.py
@@ -159,6 +159,21 @@ class NdtrGradientTest(tf.test.TestCase):
   _use_log = False
   _grid = GridSpec(min=-100., max=100., shape=[1, 2, 3, 8])
 
+  def assert_all_true(self, v):
+    self.assertAllEqual(np.ones_like(v, dtype=np.bool), v)
+
+  def assert_all_false(self, v):
+    self.assertAllEqual(np.zeros_like(v, dtype=np.bool), v)
+
+  def _test_grad_finite(self, dtype):
+    with self.test_session():
+      x = tf.Variable([-100., 0., 100.], dtype=dtype)
+      output = (sm.log_ndtr(x) if self._use_log else sm.ndtr(x))
+      grad_output = tf.gradients(output, x)
+      tf.initialize_all_variables().run()
+      self.assert_all_true(np.isfinite(output.eval()))
+      self.assert_all_true(np.isfinite(grad_output[0].eval()))
+
   def _test_grads_are_positive(self, dtype, grid_spec):
     grid = tf.convert_to_tensor(_make_grid(dtype, grid_spec))
     with self.test_session():
@@ -169,20 +184,24 @@ class NdtrGradientTest(tf.test.TestCase):
       # grad_eval.shape = (N, N), with grad_eval[i, j] the partial derivative of
       # the ith output point w.r.t. the jth grid point.  We only expect the
       # diagonal to be nonzero.
+      # TODO(b/31131137): Replace tf.test.compute_gradient with our own custom
+      # gradient evaluation to ensure we correctly handle small function delta.
       grad_eval, _ = tf.test.compute_gradient(
           grid, grid_spec.shape, output, grid_spec.shape)
       grad_eval = np.diag(grad_eval)
 
       # Check for NaN separately in order to get informative failures.
-      self.assertFalse(np.isnan(grad_eval).any())
-      self.assertTrue((grad_eval > 0).all())
-      self.assertTrue(np.isfinite(grad_eval).all())
+      self.assert_all_false(np.isnan(grad_eval))
+      self.assert_all_true(grad_eval > 0.)
+      self.assert_all_true(np.isfinite(grad_eval))
 
   def test_float32(self):
     self._test_grads_are_positive(np.float32, self._grid)
+    self._test_grad_finite(np.float32)
 
   def test_float64(self):
     self._test_grads_are_positive(np.float64, self._grid)
+    self._test_grad_finite(np.float64)
 
 
 class LogNdtrGradientTest(NdtrGradientTest):
diff --git a/tensorflow/contrib/bayesflow/python/ops/special_math.py b/tensorflow/contrib/bayesflow/python/ops/special_math.py
index 2e65f1d31f4..750958aed54 100644
--- a/tensorflow/contrib/bayesflow/python/ops/special_math.py
+++ b/tensorflow/contrib/bayesflow/python/ops/special_math.py
@@ -174,13 +174,18 @@ def log_ndtr(x, series_order=3, name=None):
     # * We use one fixed series_order for all of 'x', rather than adaptive.
     # * Our docstring properly reflects that this is an asymptotic series, not a
     #   Tayor series.  We also provided a correct bound on the remainder.
-
+    # * We need to use the max/min in the _log_ndtr_lower arg to avoid nan when
+    #   x=0. This happens even though the branch is unchosen because when x=0
+    #   the gradient of a select involves the calculation 1*dy+0*(-inf)=nan
+    #   regardless of whether dy is finite. Note that the minimum is a NOP if
+    #   the branch is chosen.
     return math_ops.select(
         math_ops.greater(x, upper_segment),
         -_ndtr(-x),  # log(1-x) ~= -x, x << 1
         math_ops.select(math_ops.greater(x, lower_segment),
-                        math_ops.log(_ndtr(x)),
-                        _log_ndtr_lower(x, series_order)))
+                        math_ops.log(_ndtr(math_ops.maximum(x, lower_segment))),
+                        _log_ndtr_lower(math_ops.minimum(x, lower_segment),
+                                        series_order)))
 
 
 def _log_ndtr_lower(x, series_order):
diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 1acc335f116..a00c050d1cf 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -16,6 +16,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/operator_pd_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -27,6 +28,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/operator_pd_cholesky_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -38,6 +40,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/operator_pd_diag_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -49,6 +52,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/operator_pd_full_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -60,6 +64,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/operator_pd_identity_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -71,6 +76,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/operator_pd_vdvt_update_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -89,6 +95,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/bernoulli_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:platform_test",
     ],
 )
@@ -99,6 +106,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/beta_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:platform_test",
     ],
     tags = ["notsan"],  #http://b/31216497
@@ -110,6 +118,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/binomial_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:platform_test",
     ],
 )
@@ -120,6 +129,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/categorical_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -129,6 +139,7 @@ cuda_py_tests(
     name = "chi2_test",
     srcs = ["python/kernel_tests/chi2_test.py"],
     additional_deps = [
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -140,6 +151,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/dirichlet_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -151,6 +163,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/dirichlet_multinomial_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -161,6 +174,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/exponential_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -170,6 +184,7 @@ cuda_py_tests(
     name = "gamma_test",
     srcs = ["python/kernel_tests/gamma_test.py"],
     additional_deps = [
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -180,6 +195,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/inverse_gamma_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -190,6 +206,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/laplace_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -200,6 +217,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/multinomial_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -211,6 +229,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/mvn_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -222,6 +241,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/mixture_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -233,6 +253,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/normal_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -244,6 +265,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/poisson_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -255,6 +277,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/student_t_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -266,6 +289,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/uniform_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
     ],
 )
@@ -277,6 +301,7 @@ cuda_py_tests(
     additional_deps = [
         ":distributions_py",
         "//tensorflow/python:framework_test_lib",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:platform_test",
     ],
 )
@@ -286,6 +311,7 @@ cuda_py_tests(
     size = "small",
     srcs = ["python/kernel_tests/kullback_leibler_test.py"],
     additional_deps = [
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:platform_test",
     ],
 )
@@ -296,6 +322,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/normal_conjugate_posteriors_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:platform_test",
     ],
 )
@@ -306,6 +333,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/transformed_distribution_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:platform_test",
     ],
 )
@@ -316,6 +344,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/distribution_util_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -327,6 +356,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/shape_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -338,6 +368,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/bijector_test.py"],
     additional_deps = [
         ":distributions_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/normal_test.py b/tensorflow/contrib/distributions/python/kernel_tests/normal_test.py
index 38517c7a015..222a2dcfc1c 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/normal_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/normal_test.py
@@ -27,6 +27,14 @@ import tensorflow as tf
 
 class NormalTest(tf.test.TestCase):
 
+  def setUp(self):
+    self._rng = np.random.RandomState(123)
+
+  def assertAllFinite(self, tensor):
+    is_finite = np.isfinite(tensor.eval())
+    all_true = np.ones_like(is_finite, dtype=np.bool)
+    self.assertAllEqual(all_true, is_finite)
+
   def _testParamShapes(self, sample_shape, expected):
     with self.test_session():
       param_shapes = tf.contrib.distributions.Normal.param_shapes(sample_shape)
@@ -143,21 +151,94 @@ class NormalTest(tf.test.TestCase):
 
   def testNormalCDF(self):
     with self.test_session():
-      batch_size = 6
-      mu = tf.constant([3.0] * batch_size)
-      sigma = tf.constant([math.sqrt(10.0)] * batch_size)
-      x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32)
+      batch_size = 50
+      mu = self._rng.randn(batch_size)
+      sigma = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
 
       normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma)
-      expected_cdf = stats.norm(mu.eval(), sigma.eval()).cdf(x)
+      expected_cdf = stats.norm(mu, sigma).cdf(x)
 
       cdf = normal.cdf(x)
-      self.assertAllClose(expected_cdf, cdf.eval())
+      self.assertAllClose(expected_cdf, cdf.eval(), atol=0)
       self.assertAllEqual(normal.batch_shape().eval(), cdf.get_shape())
       self.assertAllEqual(normal.batch_shape().eval(), cdf.eval().shape)
       self.assertAllEqual(normal.get_batch_shape(), cdf.get_shape())
       self.assertAllEqual(normal.get_batch_shape(), cdf.eval().shape)
 
+  def testNormalSurvivalFunction(self):
+    with self.test_session():
+      batch_size = 50
+      mu = self._rng.randn(batch_size)
+      sigma = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-8.0, 8.0, batch_size).astype(np.float64)
+
+      normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma)
+      expected_sf = stats.norm(mu, sigma).sf(x)
+
+      sf = normal.survival_function(x)
+      self.assertAllClose(expected_sf, sf.eval(), atol=0)
+      self.assertAllEqual(normal.batch_shape().eval(), sf.get_shape())
+      self.assertAllEqual(normal.batch_shape().eval(), sf.eval().shape)
+      self.assertAllEqual(normal.get_batch_shape(), sf.get_shape())
+      self.assertAllEqual(normal.get_batch_shape(), sf.eval().shape)
+
+  def testNormalLogCDF(self):
+    with self.test_session():
+      batch_size = 50
+      mu = self._rng.randn(batch_size)
+      sigma = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-100.0, 10.0, batch_size).astype(np.float64)
+
+      normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma)
+      expected_cdf = stats.norm(mu, sigma).logcdf(x)
+
+      cdf = normal.log_cdf(x)
+      self.assertAllClose(expected_cdf, cdf.eval(), atol=0, rtol=1e-5)
+      self.assertAllEqual(normal.batch_shape().eval(), cdf.get_shape())
+      self.assertAllEqual(normal.batch_shape().eval(), cdf.eval().shape)
+      self.assertAllEqual(normal.get_batch_shape(), cdf.get_shape())
+      self.assertAllEqual(normal.get_batch_shape(), cdf.eval().shape)
+
+  def testFiniteGradientAtDifficultPoints(self):
+    with self.test_session():
+      for dtype in [np.float32, np.float64]:
+        mu = tf.Variable(dtype(0.0))
+        sigma = tf.Variable(dtype(1.0))
+        dist = tf.contrib.distributions.Normal(mu=mu, sigma=sigma)
+        tf.initialize_all_variables().run()
+        for func in [
+            dist.cdf,
+            dist.log_cdf,
+            dist.survival_function,
+            dist.log_survival_function,
+            dist.log_prob,
+            dist.prob]:
+          x = np.array([-100., -20., -5., 0., 5., 20., 100.]).astype(dtype)
+          value = func(x)
+          grads = tf.gradients(value, [mu, sigma])
+
+          self.assertAllFinite(value)
+          self.assertAllFinite(grads[0])
+          self.assertAllFinite(grads[1])
+
+  def testNormalLogSurvivalFunction(self):
+    with self.test_session():
+      batch_size = 50
+      mu = self._rng.randn(batch_size)
+      sigma = self._rng.rand(batch_size) + 1.0
+      x = np.linspace(-10.0, 100.0, batch_size).astype(np.float64)
+
+      normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma)
+      expected_sf = stats.norm(mu, sigma).logsf(x)
+
+      sf = normal.log_survival_function(x)
+      self.assertAllClose(expected_sf, sf.eval(), atol=0, rtol=1e-5)
+      self.assertAllEqual(normal.batch_shape().eval(), sf.get_shape())
+      self.assertAllEqual(normal.batch_shape().eval(), sf.eval().shape)
+      self.assertAllEqual(normal.get_batch_shape(), sf.get_shape())
+      self.assertAllEqual(normal.get_batch_shape(), sf.eval().shape)
+
   def testNormalEntropyWithScalarInputs(self):
     # Scipy.stats.norm cannot deal with the shapes in the other test.
     with self.test_session():
diff --git a/tensorflow/contrib/distributions/python/ops/distribution.py b/tensorflow/contrib/distributions/python/ops/distribution.py
index 76230055973..d180da6db89 100644
--- a/tensorflow/contrib/distributions/python/ops/distribution.py
+++ b/tensorflow/contrib/distributions/python/ops/distribution.py
@@ -540,6 +540,16 @@ class Distribution(BaseDistribution):
   def log_cdf(self, value, name="log_cdf"):
     """Log cumulative distribution function.
 
+    Given random variable `X`, the cumulative distribution function `cdf` is:
+
+    ```
+    log_cdf(x) := Log[ P[X <= x] ]
+    ```
+
+    Often, a numerical approximation can be used for `log_cdf(x)` that yields
+    a more accurate answer than simply taking the logarithm of the `cdf` when
+    `x << -1`.
+
     Args:
       value: `float` or `double` `Tensor`.
       name: The name to give this op.
@@ -556,6 +566,12 @@ class Distribution(BaseDistribution):
   def cdf(self, value, name="cdf"):
     """Cumulative distribution function.
 
+    Given random variable `X`, the cumulative distribution function `cdf` is:
+
+    ```
+    cdf(x) := P[X <= x]
+    ```
+
     Args:
       value: `float` or `double` `Tensor`.
       name: The name to give this op.
@@ -569,6 +585,57 @@ class Distribution(BaseDistribution):
       value = ops.convert_to_tensor(value, name="value")
       return self._cdf(value)
 
+  def log_survival_function(self, value, name="log_survival_function"):
+    """Log survival function.
+
+    Given random variable `X`, the survival function is defined:
+
+    ```
+    log_survival_function(x) = Log[ P[X > x] ]
+                             = Log[ 1 - P[X <= x] ]
+                             = Log[ 1 - cdf(x) ]
+    ```
+
+    Typically, different numerical approximations can be used for the log
+    survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+    Args:
+      value: `float` or `double` `Tensor`.
+      name: The name to give this op.
+
+    Returns:
+      `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+        `self.dtype`.
+    """
+    self._check_hasattr(self._log_survival_function)
+    with self._name_scope(name, values=[value]):
+      value = ops.convert_to_tensor(value, name="value")
+      return self._log_survival_function(value)
+
+  def survival_function(self, value, name="survival_function"):
+    """Survival function.
+
+    Given random variable `X`, the survival function is defined:
+
+    ```
+    survival_function(x) = P[X > x]
+                         = 1 - P[X <= x]
+                         = 1 - cdf(x).
+    ```
+
+    Args:
+      value: `float` or `double` `Tensor`.
+      name: The name to give this op.
+
+    Returns:
+      Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+        `self.dtype`.
+    """
+    self._check_hasattr(self._survival_function)
+    with self._name_scope(name, values=[value]):
+      value = ops.convert_to_tensor(value, name="value")
+      return self._survival_function(value)
+
   def entropy(self, name="entropy"):
     """Shanon entropy in nats."""
     self._check_hasattr(self._entropy)
diff --git a/tensorflow/contrib/distributions/python/ops/normal.py b/tensorflow/contrib/distributions/python/ops/normal.py
index eee6b8e3b5e..f4d59099990 100644
--- a/tensorflow/contrib/distributions/python/ops/normal.py
+++ b/tensorflow/contrib/distributions/python/ops/normal.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import math
 
+from tensorflow.contrib.bayesflow.python.ops import special_math
 from tensorflow.contrib.distributions.python.ops import distribution
 from tensorflow.contrib.distributions.python.ops import kullback_leibler
 from tensorflow.contrib.framework.python.framework import tensor_util as contrib_tensor_util
@@ -169,20 +170,22 @@ class Normal(distribution.Distribution):
 
   def _log_prob(self, x):
     return (-0.5 * math.log(2. * math.pi) - math_ops.log(self.sigma)
-            -0.5 * math_ops.square((x - self.mu) / self.sigma))
+            -0.5 * math_ops.square(self._z(x)))
 
   def _prob(self, x):
     return math_ops.exp(self._log_prob(x))
 
   def _log_cdf(self, x):
-    return math_ops.log(self._cdf(x))
+    return special_math.log_ndtr(self._z(x))
 
   def _cdf(self, x):
-    # TODO(ebrevdo): wrap this in a Defun with a custom Defun
-    # gradient because the analytic gradient may be faster than
-    # automatic differentiation.
-    return (0.5 + 0.5*math_ops.erf(
-        1. / (math.sqrt(2.) * self.sigma) * (x - self.mu)))
+    return special_math.ndtr(self._z(x))
+
+  def _log_survival_function(self, x):
+    return special_math.log_ndtr(-self._z(x))
+
+  def _survival_function(self, x):
+    return special_math.ndtr(-self._z(x))
 
   def _entropy(self):
     # Use broadcasting rules to calculate the full broadcast sigma.
@@ -201,6 +204,11 @@ class Normal(distribution.Distribution):
   def _mode(self):
     return self._mean()
 
+  def _z(self, x):
+    """Standardize input `x` to a unit normal."""
+    with ops.name_scope("standardize", values=[x]):
+      return (x - self.mu) / self.sigma
+
 
 @kullback_leibler.RegisterKL(Normal, Normal)
 def _kl_normal_normal(n_a, n_b, name=None):
diff --git a/tensorflow/contrib/factorization/python/ops/gmm.py b/tensorflow/contrib/factorization/python/ops/gmm.py
index 3e4d1ede5ee..65c3c88c99a 100644
--- a/tensorflow/contrib/factorization/python/ops/gmm.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm.py
@@ -30,6 +30,7 @@ from tensorflow.contrib.factorization.python.ops import gmm_ops
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators._sklearn import TransformerMixin
 from tensorflow.contrib.learn.python.learn.learn_io import data_feeder
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.control_flow_ops import with_dependencies
 
 
@@ -166,12 +167,17 @@ class GMM(estimator.Estimator, TransformerMixin):
         self.model_dir,
         gmm_ops.GmmAlgorithm.CLUSTERS_COVS_VARIABLE)
 
+  def _parse_tensor_or_dict(self, features):
+    if isinstance(features, dict):
+      return array_ops.concat(1, [features[k] for k in sorted(features.keys())])
+    return features
+
   def _get_train_ops(self, features, _):
     (_,
      _,
      losses,
      training_op) = gmm_ops.gmm(
-         features,
+         self._parse_tensor_or_dict(features),
          self._training_initial_clusters,
          self._num_clusters,
          self._random_seed,
@@ -187,7 +193,7 @@ class GMM(estimator.Estimator, TransformerMixin):
      model_predictions,
      _,
      _) = gmm_ops.gmm(
-         features,
+         self._parse_tensor_or_dict(features),
          self._training_initial_clusters,
          self._num_clusters,
          self._random_seed,
@@ -203,7 +209,7 @@ class GMM(estimator.Estimator, TransformerMixin):
      _,
      losses,
      _) = gmm_ops.gmm(
-         features,
+         self._parse_tensor_or_dict(features),
          self._training_initial_clusters,
          self._num_clusters,
          self._random_seed,
diff --git a/tensorflow/contrib/factorization/python/ops/kmeans.py b/tensorflow/contrib/factorization/python/ops/kmeans.py
index fedf1a4add7..88cf5f084d8 100644
--- a/tensorflow/contrib/factorization/python/ops/kmeans.py
+++ b/tensorflow/contrib/factorization/python/ops/kmeans.py
@@ -28,6 +28,7 @@ from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators._sklearn import TransformerMixin
 from tensorflow.contrib.learn.python.learn.learn_io import data_feeder
 from tensorflow.contrib.learn.python.learn.monitors import BaseMonitor
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.control_flow_ops import with_dependencies
 
 SQUARED_EUCLIDEAN_DISTANCE = clustering_ops.SQUARED_EUCLIDEAN_DISTANCE
@@ -222,12 +223,17 @@ class KMeansClustering(estimator.Estimator,
     """Returns cluster centers."""
     return tf.contrib.framework.load_variable(self.model_dir, self.CLUSTERS)
 
+  def _parse_tensor_or_dict(self, features):
+    if isinstance(features, dict):
+      return array_ops.concat(1, [features[k] for k in sorted(features.keys())])
+    return features
+
   def _get_train_ops(self, features, _):
     (_,
      _,
      losses,
      training_op) = clustering_ops.KMeans(
-         features,
+         self._parse_tensor_or_dict(features),
          self._num_clusters,
          self._training_initial_clusters,
          self._distance_metric,
@@ -245,7 +251,7 @@ class KMeansClustering(estimator.Estimator,
      model_predictions,
      _,
      _) = clustering_ops.KMeans(
-         features,
+         self._parse_tensor_or_dict(features),
          self._num_clusters,
          self._training_initial_clusters,
          self._distance_metric,
@@ -263,7 +269,7 @@ class KMeansClustering(estimator.Estimator,
      _,
      losses,
      _) = clustering_ops.KMeans(
-         features,
+         self._parse_tensor_or_dict(features),
          self._num_clusters,
          self._training_initial_clusters,
          self._distance_metric,
diff --git a/tensorflow/contrib/grid_rnn/BUILD b/tensorflow/contrib/grid_rnn/BUILD
index c3b9b5a9ddf..021f852e669 100644
--- a/tensorflow/contrib/grid_rnn/BUILD
+++ b/tensorflow/contrib/grid_rnn/BUILD
@@ -21,6 +21,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/grid_rnn_test.py"],
     additional_deps = [
         ":grid_rnn_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD
index a41c5efb6e9..a4619c5b163 100644
--- a/tensorflow/contrib/layers/BUILD
+++ b/tensorflow/contrib/layers/BUILD
@@ -127,6 +127,7 @@ py_test(
     name = "optimizers_test",
     srcs = ["python/layers/optimizers_test.py"],
     srcs_version = "PY2AND3",
+    tags = ["manual"],  # http://b/31223979
     deps = [
         ":layers_py",
         "//tensorflow:tensorflow_py",
diff --git a/tensorflow/contrib/layers/python/layers/target_column.py b/tensorflow/contrib/layers/python/layers/target_column.py
index 8dc0f6548b9..bb9d2954a99 100644
--- a/tensorflow/contrib/layers/python/layers/target_column.py
+++ b/tensorflow/contrib/layers/python/layers/target_column.py
@@ -181,7 +181,7 @@ class _TargetColumn(object):
                                      weight_tensor, shape=(-1,)))
     return weighted_loss
 
-  def training_loss(self, logits, target, features):
+  def training_loss(self, logits, target, features, name="training_loss"):
     """Returns training loss tensor for this head.
 
     Training loss is different from the loss reported on the tensorboard as we
@@ -197,6 +197,7 @@ class _TargetColumn(object):
       target: either a tensor for labels or in multihead case, a dict of string
         to target tensor.
       features: features dict.
+      name: Op name.
 
     Returns:
       Loss tensor.
@@ -206,10 +207,9 @@ class _TargetColumn(object):
 
     weight_tensor = self.get_weight_tensor(features)
     if weight_tensor is None:
-      return math_ops.reduce_mean(loss_unweighted, name="loss")
-    else:
-      loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor)
-      return math_ops.reduce_mean(loss_weighted, name="loss")
+      return math_ops.reduce_mean(loss_unweighted, name=name)
+    loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor)
+    return math_ops.reduce_mean(loss_weighted, name=name)
 
   def loss(self, logits, target, features):
     """Returns loss tensor for this head.
@@ -233,12 +233,11 @@ class _TargetColumn(object):
     weight_tensor = self.get_weight_tensor(features)
     if weight_tensor is None:
       return math_ops.reduce_mean(loss_unweighted, name="loss")
-    else:
-      loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor)
-      return math_ops.div(
-          math_ops.reduce_sum(loss_weighted),
-          math_ops.to_float(math_ops.reduce_sum(weight_tensor)),
-          name="loss")
+    loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor)
+    return math_ops.div(
+        math_ops.reduce_sum(loss_weighted),
+        math_ops.to_float(math_ops.reduce_sum(weight_tensor)),
+        name="loss")
 
 
 class _RegressionTargetColumn(_TargetColumn):
@@ -409,8 +408,10 @@ def _run_metrics(predictions, targets, metrics, weights):
   result = {}
   targets = math_ops.cast(targets, predictions.dtype)
   for name, metric in six.iteritems(metrics or {}):
-    result[name] = metrics_lib.run_metric(
-        metric, predictions, targets, weights=weights)
+    if weights is not None:
+      result[name] = metric(predictions, targets, weights=weights)
+    else:
+      result[name] = metric(predictions, targets)
 
   return result
 
diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD
index 6c41df1051b..f758ace4332 100644
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
@@ -299,6 +299,18 @@ py_test(
     ],
 )
 
+py_test(
+    name = "metric_spec_test",
+    size = "small",
+    srcs = ["python/learn/tests/metric_spec_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":learn",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
 py_test(
     name = "experiment_test",
     size = "small",
diff --git a/tensorflow/contrib/learn/python/learn/__init__.py b/tensorflow/contrib/learn/python/learn/__init__.py
index 34b4bf38239..2945934d3f4 100644
--- a/tensorflow/contrib/learn/python/learn/__init__.py
+++ b/tensorflow/contrib/learn/python/learn/__init__.py
@@ -35,12 +35,12 @@ from tensorflow.contrib.learn.python.learn.dataframe import *
 from tensorflow.contrib.learn.python.learn.estimators import *
 from tensorflow.contrib.learn.python.learn.evaluable import Evaluable
 from tensorflow.contrib.learn.python.learn.experiment import Experiment
-from tensorflow.contrib.learn.python.learn.monitors import NanLossDuringTrainingError
 from tensorflow.contrib.learn.python.learn.graph_actions import evaluate
 from tensorflow.contrib.learn.python.learn.graph_actions import infer
 from tensorflow.contrib.learn.python.learn.graph_actions import run_feeds
 from tensorflow.contrib.learn.python.learn.graph_actions import run_n
 from tensorflow.contrib.learn.python.learn.graph_actions import train
 from tensorflow.contrib.learn.python.learn.learn_io import *
+from tensorflow.contrib.learn.python.learn.monitors import NanLossDuringTrainingError
 from tensorflow.contrib.learn.python.learn.trainable import Trainable
 # pylint: enable=wildcard-import
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
index 5394584bf01..ca2065b278a 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
@@ -79,8 +79,6 @@ class DNNClassifier(dnn_linear_combined.DNNLinearCombinedClassifier):
       Both features' `value` must be a `SparseTensor`.
     - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
       whose `value` is a `Tensor`.
-    - if `feature_columns` is `None`, then `input` must contain only real
-      valued `Tensor`.
   """
 
   def __init__(self,
@@ -211,8 +209,6 @@ class DNNRegressor(dnn_linear_combined.DNNLinearCombinedRegressor):
       Both features' `value` must be a `SparseTensor`.
     - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
       whose `value` is a `Tensor`.
-    - if `feature_columns` is `None`, then `input` must contain only real
-      valued `Tensor`.
   """
 
   def __init__(self,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py
index 8e99595c472..d81d99f672c 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py
@@ -253,9 +253,11 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
     logits = array_ops.reshape(
         array_ops.tile(centered_bias[0], [batch_size]),
         [batch_size, self._target_column.num_label_columns])
-    training_loss = self._target_column.training_loss(logits, targets, features)
-    # Learn central bias by an optimizer. 0.1 is a convervative lr for a single
-    # variable.
+    with ops.name_scope(None, "centered_bias", (targets, features)):
+      training_loss = self._target_column.training_loss(
+          logits, targets, features)
+    # Learn central bias by an optimizer. 0.1 is a convervative lr for a
+    # single variable.
     return training.AdagradOptimizer(0.1).minimize(
         training_loss, var_list=centered_bias)
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
index 9e3a6dfd7fb..768e8045e6b 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
@@ -223,10 +223,13 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
         linear_feature_columns=[tf.contrib.layers.real_valued_column('x')],
         dnn_feature_columns=[tf.contrib.layers.real_valued_column('x')],
         dnn_hidden_units=[3, 3])
-
-    classifier.fit(input_fn=_input_fn_train, steps=100)
-    scores = classifier.evaluate(input_fn=_input_fn_eval,
-                                 steps=100)
+    classifier.fit(input_fn=_input_fn_train, steps=100, monitors=(
+        tf.contrib.learn.monitors.CaptureVariable(var_name='loss'),
+        tf.contrib.learn.monitors.CaptureVariable(
+            var_name='centered_bias/training_loss'),
+        tf.contrib.learn.monitors.CaptureVariable(var_name='training_loss'),
+    ))
+    scores = classifier.evaluate(input_fn=_input_fn_eval, steps=100)
     # If there is no weight column, model should learn y=Not(x). All examples in
     # eval data set are y=x. So if weight column is ignored, then accuracy
     # should be zero.
@@ -251,8 +254,12 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase):
         linear_feature_columns=[tf.contrib.layers.real_valued_column('x')],
         dnn_feature_columns=[tf.contrib.layers.real_valued_column('x')],
         dnn_hidden_units=[3, 3])
-
-    classifier.fit(input_fn=_input_fn_train, steps=100)
+    classifier.fit(input_fn=_input_fn_train, steps=100, monitors=(
+        tf.contrib.learn.monitors.CaptureVariable(var_name='loss'),
+        tf.contrib.learn.monitors.CaptureVariable(
+            var_name='centered_bias/training_loss'),
+        tf.contrib.learn.monitors.CaptureVariable(var_name='training_loss'),
+    ))
     scores = classifier.evaluate(input_fn=_input_fn_train, steps=100)
     # If weight column is ignored, then accuracy should be 0.25. If it's not
     # ignored, then it should be greater than 0.6.
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index 28d27b545cf..d33346b4f64 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -37,6 +37,7 @@ from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
 from tensorflow.contrib.learn.python.learn import evaluable
 from tensorflow.contrib.learn.python.learn import graph_actions
+from tensorflow.contrib.learn.python.learn import metric_spec
 from tensorflow.contrib.learn.python.learn import monitors as monitor_lib
 from tensorflow.contrib.learn.python.learn import session_run_hook
 from tensorflow.contrib.learn.python.learn import trainable
@@ -52,7 +53,6 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import device_setter
 from tensorflow.python.training import saver
@@ -174,6 +174,76 @@ def _get_replica_device_setter(config):
     return None
 
 
+def _make_metrics_ops(metrics, features, targets, predictions):
+  """Add metrics to run on features, targets, and predictions dicts or tensors.
+
+  `metrics` contains a specification for how to run metrics. It is a dict
+  mapping friendly names to either `MetricSpec` objects, or directly to a metric
+  function (assuming that predictions and targets are single tensors), or to
+  a `(pred_name, metric)` tuples, which passes `predictions[pred_name]` and
+  targets to `metric` (assuming targets is a single tensor).
+
+  Users are encouraged to use `MetricSpec` objects, which are more flexible and
+  cleaner. They also lead to clearer errors.
+
+  Args:
+    metrics: A dict mapping names to metrics specification, for example
+      `MetricSpec` objects.
+    features: A dict of tensors returned from an input_fn as features/inputs.
+    targets: A single tensor or a dict of tensors returned from an input_fn as
+      labels.
+    predictions: A single tensor or a dict of tensors output from a model as
+      predictions.
+
+  Returns:
+    A dict mapping the friendly given in `metrics` to the result of calling the
+    given metric function.
+
+  Raises:
+    ValueError: If metrics specifications do not work with the type of
+      features/targets/predictions provided. Mostly, a dict is given but no
+      pred_name specified.
+  """
+  metrics = metrics or {}
+  if isinstance(targets, dict) and len(targets) == 1:
+    # Unpack single target into just tensor.
+    targets = targets[list(targets.keys())[0]]
+  result = {}
+  for name, metric in six.iteritems(metrics):
+    if isinstance(metric, metric_spec.MetricSpec):
+      result[name] = metric.create_metric_ops(features, targets, predictions)
+      continue
+
+    # TODO(b/31229024): Remove the rest of this loop
+    logging.warning('Please specify metrics using MetricSpec. Using bare '
+                    'functions or (key, fn) tuples is deprecated and support '
+                    'for it will be removed on Oct 1, 2016.')
+
+    if isinstance(name, tuple):
+      # Multi-head metrics.
+      if not isinstance(predictions, dict):
+        raise ValueError(
+            'Metrics passed provide (name, prediction), '
+            'but predictions are not dict. '
+            'Metrics: %s, Predictions: %s.' % (metrics, predictions))
+      # Here are two options: targets are single Tensor or a dict.
+      if isinstance(targets, dict) and name[1] in targets:
+        # If targets are dict and the prediction name is in it, apply metric.
+        result[name[0]] = metric(predictions[name[1]], targets[name[1]])
+      else:
+        # Otherwise pass the targets to the metric.
+        result[name[0]] = metric(predictions[name[1]], targets)
+    else:
+      # Single head metrics.
+      if isinstance(predictions, dict):
+        raise ValueError(
+            'Metrics passed provide only name, no prediction, '
+            'but predictions are dict. '
+            'Metrics: %s, Targets: %s.' % (metrics, targets))
+      result[name] = metric(predictions, targets)
+  return result
+
+
 class BaseEstimator(
     sklearn.BaseEstimator, evaluable.Evaluable, trainable.Trainable):
   """Abstract BaseEstimator class to train and evaluate TensorFlow models.
@@ -389,7 +459,7 @@ class BaseEstimator(
       'The signature of the input_fn accepted by export is changing to be '
       'consistent with what\'s used by tf.Learn Estimator\'s train/evaluate. '
       'input_fn and input_feature_key will become required args, '
-      'and use_deprecated_input_fn will default to False &  be removed '
+      'and use_deprecated_input_fn will default to False and be removed '
       'altogether.',
       use_deprecated_input_fn=True,
       input_fn=None,
@@ -470,15 +540,14 @@ class BaseEstimator(
     Args:
       features: `Tensor` or `dict` of `Tensor` objects.
       targets: `Tensor` or `dict` of `Tensor` objects.
-      metrics: Dict of metric ops to run. If None, the default metric functions
-        are used; if {}, no metrics are used. If model has one output (i.e.,
-        returning single predction), keys are `str`, e.g. `'accuracy'` - just a
-        name of the metric that will show up in the logs / summaries.
-        Otherwise, keys are tuple of two `str`, e.g. `('accuracy', 'classes')`
-        - name of the metric and name of `Tensor` in the predictions to run
-        this metric on. Metric ops should support streaming, e.g., returning
+      metrics: Dict of metrics to run. If None, the default metric functions
+        are used; if {}, no metrics are used. Otherwise, `metrics` should map
+        friendly names for the metric to a `MetricSpec` object defining which
+        model outputs to evaluate against which targets with which metric
+        function. Metric ops should support streaming, e.g., returning
         update_op and value tensors. See more details in
-        ../../../../metrics/python/metrics/ops/streaming_metrics.py.
+        `../../../../metrics/python/metrics/ops/streaming_metrics.py` and
+        `../metric_spec.py`.
 
     Returns:
       metrics: `dict` of `Tensor` objects.
@@ -782,8 +851,7 @@ class Estimator(BaseEstimator):
                model_fn=None,
                model_dir=None,
                config=None,
-               params=None,
-               weight_column_name=None):
+               params=None):
     """Constructs an Estimator instance.
 
     Args:
@@ -795,7 +863,7 @@ class Estimator(BaseEstimator):
           * `(features, targets, mode) -> (predictions, loss, train_op)`
           * `(features, targets, mode, params) -> (predictions, loss, train_op)`
 
-      Where
+        Where
 
           * `features` are single `Tensor` or `dict` of `Tensor`s
                  (depending on data passed to `fit`),
@@ -816,9 +884,6 @@ class Estimator(BaseEstimator):
       config: Configuration object.
       params: `dict` of hyper parameters that will be passed into `model_fn`.
               Keys are names of parameters, values are basic python types.
-      weight_column_name: A string defining feature column name representing
-        weights. It is used to down weight or boost examples during training. It
-        will be multiplied by the loss of the example.
 
     Raises:
       ValueError: parameters of `model_fn` don't match `params`.
@@ -831,17 +896,10 @@ class Estimator(BaseEstimator):
         raise ValueError('Estimator\'s model_fn (%s) has less than 4 '
                          'arguments, but not None params (%s) are passed.' %
                          (model_fn, params))
-      if (params is None and weight_column_name is None and
-          'params' in model_fn_args):
+      if params is None and 'params' in model_fn_args:
         logging.warning('Estimator\'s model_fn (%s) has includes params '
                         'argument, but params are not passed to Estimator.',
                         model_fn)
-    self.weight_column_name = weight_column_name
-    if weight_column_name is not None:
-      if params is None:
-        params = {'weight_column_name': weight_column_name}
-      else:
-        params['weight_column_name'] = weight_column_name
     self._model_fn = model_fn
     self.params = params
 
@@ -855,11 +913,6 @@ class Estimator(BaseEstimator):
         return self._model_fn(features, targets, mode=mode)
     return self._model_fn(features, targets)
 
-  def _get_weight_tensor(self, features):
-    if not self.weight_column_name:
-      return None
-    return math_ops.to_float(features[self.weight_column_name])
-
   def _get_train_ops(self, features, targets):
     """Method that builds model graph and returns trainer ops.
 
@@ -887,15 +940,14 @@ class Estimator(BaseEstimator):
     Args:
       features: `Tensor` or `dict` of `Tensor` objects.
       targets: `Tensor` or `dict` of `Tensor` objects.
-      metrics: Dict of metric ops to run. If None, the default metric functions
-        are used; if {}, no metrics are used. If model has one output (i.e.,
-        returning single predction), keys are `str`, e.g. `'accuracy'` - just a
-        name of the metric that will show up in the logs / summaries.
-        Otherwise, keys are tuple of two `str`, e.g. `('accuracy', 'classes')`
-        - name of the metric and name of `Tensor` in the predictions to run
-        this metric on. Metric ops should support streaming, e.g., returning
+      metrics: Dict of metrics to run. If None, the default metric functions
+        are used; if {}, no metrics are used. Otherwise, `metrics` should map
+        friendly names for the metric to a `MetricSpec` object defining which
+        model outputs to evaluate against which targets with which metric
+        function. Metric ops should support streaming, e.g., returning
         update_op and value tensors. See more details in
-        ../../../../metrics/python/metrics/ops/streaming_metrics.py.
+        `../../../../metrics/python/metrics/ops/streaming_metrics.py` and
+        `../metric_spec.py`.
 
     Returns:
       metrics: `dict` of `Tensor` objects.
@@ -905,38 +957,7 @@ class Estimator(BaseEstimator):
     """
     predictions, loss, _ = self._call_model_fn(features, targets, ModeKeys.EVAL)
     result = {'loss': metrics_lib.streaming_mean(loss)}
-
-    weights = self._get_weight_tensor(features)
-    metrics = metrics or {}
-    if isinstance(targets, dict) and len(targets) == 1:
-      # Unpack single target into just tensor.
-      targets = targets[list(targets.keys())[0]]
-    for name, metric in six.iteritems(metrics):
-      if isinstance(name, tuple):
-        # Multi-head metrics.
-        if not isinstance(predictions, dict):
-          raise ValueError(
-              'Metrics passed provide (name, prediction), '
-              'but predictions are not dict. '
-              'Metrics: %s, Predictions: %s.' % (metrics, predictions))
-        # Here are two options: targets are single Tensor or a dict.
-        if isinstance(targets, dict) and name[1] in targets:
-          # If targets are dict and the prediction name is in it, apply metric.
-          result[name[0]] = metrics_lib.run_metric(
-              metric, predictions[name[1]], targets[name[1]], weights)
-        else:
-          # Otherwise pass the targets to the metric.
-          result[name[0]] = metrics_lib.run_metric(
-              metric, predictions[name[1]], targets, weights)
-      else:
-        # Single head metrics.
-        if isinstance(predictions, dict):
-          raise ValueError(
-              'Metrics passed provide only name, no prediction, '
-              'but predictions are dict. '
-              'Metrics: %s, Targets: %s.' % (metrics, targets))
-        result[name] = metrics_lib.run_metric(
-            metric, predictions, targets, weights)
+    result.update(_make_metrics_ops(metrics, features, targets, predictions))
     return result
 
   def _get_predict_ops(self, features):
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
index 68e8888e7e2..3ed5d9318af 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
@@ -44,16 +44,6 @@ def boston_input_fn(num_epochs=None):
   return features, target
 
 
-def boston_input_with_weight_fn():
-  boston = tf.contrib.learn.datasets.load_boston()
-  features = {}
-  features['data'] = tf.reshape(
-      tf.constant(boston.data), [-1, _BOSTON_INPUT_DIM])
-  target = tf.reshape(tf.constant(boston.target), [-1, 1])
-  features['weight'] = tf.mul(0.5, tf.ones(target.get_shape()))
-  return features, target
-
-
 def iris_input_fn():
   iris = tf.contrib.learn.datasets.load_iris()
   features = tf.reshape(tf.constant(iris.data), [-1, _IRIS_INPUT_DIM])
@@ -92,42 +82,6 @@ def linear_model_fn(features, target, mode):
   return prediction, loss, train_op
 
 
-def linear_model_with_weights_fn(features, target, mode):
-  assert mode in ('train', 'eval', 'infer')
-  prediction, loss = (
-      tf.contrib.learn.models.linear_regression_zero_init(
-          features['data'], target)
-  )
-  train_op = tf.contrib.layers.optimize_loss(
-      loss, tf.contrib.framework.get_global_step(), optimizer='Adagrad',
-      learning_rate=0.1)
-  return prediction, loss, train_op
-
-
-def linear_model_with_weights_and_params_fn(features, target, mode, params):
-  assert mode in ('train', 'eval', 'infer')
-  prediction, loss = (
-      tf.contrib.learn.models.linear_regression_zero_init(
-          features['data'], target)
-  )
-  train_op = tf.contrib.layers.optimize_loss(
-      loss, tf.contrib.framework.get_global_step(), optimizer='Adagrad',
-      learning_rate=params['learning_rate'])
-  return prediction, loss, train_op
-
-
-def squared_error_weighted_sum(predictions, targets, weights=None):
-  squared_error = tf.to_float(tf.square(predictions - targets))
-  if weights is None:
-    return tf.reduce_sum(squared_error)
-  else:
-    return tf.reduce_sum(tf.mul(squared_error, weights))
-
-
-def squared_error_no_weight(predictions, targets):
-  return squared_error_weighted_sum(predictions, targets)
-
-
 def logistic_model_no_mode_fn(features, target):
   target = tf.one_hot(target, 3, 1, 0)
   prediction, loss = (
@@ -384,40 +338,6 @@ class EstimatorTest(tf.test.TestCase):
     with self.assertRaises(ValueError):
       est.fit(input_fn=other_input_fn, steps=1)
 
-  def testEstimatorWithWeight(self):
-    est = tf.contrib.learn.Estimator(model_fn=linear_model_with_weights_fn,
-                                     weight_column_name='weight')
-    self.assertTrue(est.params is not None)
-    self.assertTrue('weight_column_name' in est.params)
-    est.fit(input_fn=boston_input_with_weight_fn, steps=100)
-    scores = est.evaluate(
-        input_fn=boston_input_with_weight_fn, steps=100,
-        metrics={'SEWS': squared_error_weighted_sum,
-                 'SE': squared_error_no_weight})
-    self.assertNear(scores['SEWS']*2, scores['SE'], 0.01)
-
-  def testEstimatorWithWeightAndParams(self):
-    est = tf.contrib.learn.Estimator(
-        model_fn=linear_model_with_weights_and_params_fn,
-        params={'learning_rate': 0.01},
-        weight_column_name='weight')
-    self.assertTrue('weight_column_name' in est.params)
-    est.fit(input_fn=boston_input_with_weight_fn, steps=100)
-    scores = est.evaluate(
-        input_fn=boston_input_with_weight_fn, steps=100,
-        metrics={'SEWS': squared_error_weighted_sum,
-                 'SE': squared_error_no_weight})
-    self.assertNear(scores['SEWS']*2, scores['SE'], 0.01)
-
-  def testEstimatorWithNoWeight(self):
-    est = tf.contrib.learn.Estimator(model_fn=linear_model_with_weights_fn)
-    est.fit(input_fn=boston_input_with_weight_fn, steps=100)
-    scores = est.evaluate(
-        input_fn=boston_input_with_weight_fn, steps=100,
-        metrics={'SEWS': squared_error_weighted_sum,
-                 'SE': squared_error_no_weight})
-    self.assertNear(scores['SEWS'], scores['SE'], 0.01)
-
   def testMonitors(self):
     est = tf.contrib.learn.Estimator(model_fn=linear_model_fn)
     est.fit(input_fn=boston_input_fn,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py
index c21368bacbb..7dd0546775a 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py
@@ -32,6 +32,7 @@ from tensorflow.contrib import metrics as metrics_lib
 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 from tensorflow.contrib.layers.python.layers import target_column
 from tensorflow.contrib.learn.python.learn import evaluable
+from tensorflow.contrib.learn.python.learn import metric_spec
 from tensorflow.contrib.learn.python.learn import trainable
 from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined
 from tensorflow.contrib.learn.python.learn.estimators import estimator
@@ -70,7 +71,7 @@ def _wrap_metric(metric):
     targets = math_ops.cast(targets, preds.dtype)
     return metric(preds, targets)
 
-  def wrapped_weights(preds, targets, weights):
+  def wrapped_weights(preds, targets, weights=None):
     targets = math_ops.cast(targets, preds.dtype)
     if weights is not None:
       weights = array_ops.reshape(math_ops.to_float(weights), shape=(-1,))
@@ -264,6 +265,7 @@ def sdca_classifier_model_fn(features, targets, mode, params):
   loss = None
   if mode != estimator.ModeKeys.INFER:
     loss = math_ops.reduce_mean(loss_fn(logits, targets), name="loss")
+    logging_ops.scalar_summary("loss", loss)
 
   train_op = None
   if mode == estimator.ModeKeys.TRAIN:
@@ -347,8 +349,6 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
       Both features' `value` must be a `SparseTensor`.
     - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
       whose `value` is a `Tensor`.
-    - if `feature_columns` is `None`, then `input` must contains only real
-      valued `Tensor`.
   """
 
   def __init__(self,
@@ -426,8 +426,7 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
         model_fn=model_fn,
         model_dir=self._model_dir,
         config=config,
-        params=params,
-        weight_column_name=weight_column_name)
+        params=params)
 
   def get_estimator(self):
     return self._estimator
@@ -445,14 +444,24 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable):
     """See evaluable.Evaluable."""
     if not metrics:
       metrics = {}
-      metrics[("accuracy", _CLASSES)] = metrics_lib.streaming_accuracy
+      metrics["accuracy"] = metric_spec.MetricSpec(
+          metric_fn=metrics_lib.streaming_accuracy,
+          prediction_key=_CLASSES)
     if self._n_classes == 2:
       additional_metrics = (
           target_column.get_default_binary_metrics_for_eval([0.5]))
-      additional_metrics = {(name, _LOGISTIC): metric
-                            for name, metric in additional_metrics.items()}
+      additional_metrics = {
+          name: metric_spec.MetricSpec(metric_fn=metric,
+                                       prediction_key=_LOGISTIC)
+          for name, metric in additional_metrics.items()
+      }
       metrics.update(additional_metrics)
+
+    # TODO(b/31229024): Remove this loop
     for metric_name, metric in metrics.items():
+      if isinstance(metric, metric_spec.MetricSpec):
+        continue
+
       if isinstance(metric_name, tuple):
         if len(metric_name) != 2:
           raise ValueError("Ignoring metric %s. It returned a tuple with len  "
@@ -577,8 +586,6 @@ class LinearRegressor(dnn_linear_combined.DNNLinearCombinedRegressor):
          key=weight column name, value=a `SparseTensor`}
     - if isinstance(column, `RealValuedColumn`):
         key=column.name, value=a `Tensor`
-    - if `feature_columns` is `None`:
-        input must contains only real valued `Tensor`.
   """
 
   def __init__(self,
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py
index 05d2f45b068..e8ce4e54615 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py
@@ -26,6 +26,7 @@ import numpy as np
 import tensorflow as tf
 
 from tensorflow.contrib.learn.python.learn.estimators import _sklearn
+from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec
 
 
 def _iris_input_fn():
@@ -137,8 +138,8 @@ class LinearClassifierTest(tf.test.TestCase):
 
     def _input_fn_train():
       # Create 4 rows, one of them (y = x), three of them (y=Not(x))
-      target = tf.constant([[1], [0], [0], [0]])
-      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),}
+      target = tf.constant([[1], [0], [0], [0]], dtype=tf.float32)
+      features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32)}
       return features, target
 
     def _my_metric_op(predictions, targets):
@@ -155,9 +156,14 @@ class LinearClassifierTest(tf.test.TestCase):
         input_fn=_input_fn_train,
         steps=100,
         metrics={
-            ('my_accuracy', 'classes'): tf.contrib.metrics.streaming_accuracy,
-            ('my_precision', 'classes'): tf.contrib.metrics.streaming_precision,
-            ('my_metric', 'probabilities'): _my_metric_op
+            'my_accuracy': MetricSpec(
+                metric_fn=tf.contrib.metrics.streaming_accuracy,
+                prediction_key='classes'),
+            'my_precision': MetricSpec(
+                metric_fn=tf.contrib.metrics.streaming_precision,
+                prediction_key='classes'),
+            'my_metric': MetricSpec(metric_fn=_my_metric_op,
+                                    prediction_key='probabilities')
         })
     self.assertTrue(
         set(['loss', 'my_accuracy', 'my_precision', 'my_metric'
diff --git a/tensorflow/contrib/learn/python/learn/estimators/svm.py b/tensorflow/contrib/learn/python/learn/estimators/svm.py
index 0d4455986b6..b82a8c03666 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/svm.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/svm.py
@@ -25,13 +25,12 @@ from tensorflow.contrib import layers
 from tensorflow.contrib import metrics as metrics_lib
 from tensorflow.contrib.layers.python.layers import target_column
 from tensorflow.contrib.learn.python.learn import evaluable
+from tensorflow.contrib.learn.python.learn import metric_spec
 from tensorflow.contrib.learn.python.learn import trainable
 from tensorflow.contrib.learn.python.learn.estimators import estimator
 from tensorflow.contrib.learn.python.learn.estimators import linear
 from tensorflow.contrib.learn.python.learn.utils import checkpoints
 from tensorflow.contrib.linear_optimizer.python import sdca_optimizer
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
 
 
 def _as_iterable(preds, output):
@@ -47,21 +46,6 @@ def _get_metric_args(metric):
             if arg not in metric.keywords.keys()]
 
 
-def _wrap_metric(metric):
-  """Wraps metrics for mismatched prediction/target types."""
-  def wrapped(preds, targets):
-    targets = math_ops.cast(targets, preds.dtype)
-    return metric(preds, targets)
-
-  def wrapped_weights(preds, targets, weights):
-    targets = math_ops.cast(targets, preds.dtype)
-    if weights is not None:
-      weights = array_ops.reshape(math_ops.to_float(weights), shape=(-1,))
-    return metric(preds, targets, weights)
-
-  return wrapped_weights if "weights" in _get_metric_args(metric) else wrapped
-
-
 class SVM(trainable.Trainable, evaluable.Evaluable):
   """Support Vector Machine (SVM) model for binary classification.
 
@@ -100,9 +84,6 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
         whose `value` is a `SparseTensor`.
       - if `column` is a `RealValuedColumn, a feature with `key=column.name`
         whose `value` is a `Tensor`.
-      - if `feature_columns` is None, then `input` must contains only real
-        valued `Tensor`.
-
 
   Parameters:
     example_id_column: A string defining the feature column name representing
@@ -166,15 +147,24 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
                batch_size=None, steps=None, metrics=None, name=None):
     """See evaluable.Evaluable."""
     if not metrics:
-      metrics = {
-          ("accuracy", linear._CLASSES): metrics_lib.streaming_accuracy,
-      }
+      metrics = {}
+      metrics["accuracy"] = metric_spec.MetricSpec(
+          metric_fn=metrics_lib.streaming_accuracy,
+          prediction_key=linear._CLASSES)
     additional_metrics = (
         target_column.get_default_binary_metrics_for_eval([0.5]))
-    additional_metrics = {(name, linear._LOGISTIC): metric
-                          for name, metric in additional_metrics.items()}
+    additional_metrics = {
+        name: metric_spec.MetricSpec(metric_fn=metric,
+                                     prediction_key=linear._LOGISTIC)
+        for name, metric in additional_metrics.items()
+    }
     metrics.update(additional_metrics)
+
+    # TODO(b/31229024): Remove this loop
     for metric_name, metric in metrics.items():
+      if isinstance(metric, metric_spec.MetricSpec):
+        continue
+
       if isinstance(metric_name, tuple):
         if len(metric_name) != 2:
           raise ValueError("Ignoring metric %s. It returned a tuple with len  "
@@ -184,7 +174,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable):
         if metric_name[1] not in valid_keys:
           raise ValueError("Ignoring metric %s. The 2nd element of its name "
                            "should be in %s" % (metric_name, valid_keys))
-      metrics[metric_name] = _wrap_metric(metric)
+      metrics[metric_name] = linear._wrap_metric(metric)
     return self._estimator.evaluate(x=x, y=y, input_fn=input_fn,
                                     feed_fn=feed_fn, batch_size=batch_size,
                                     steps=steps, metrics=metrics, name=name)
diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py
new file mode 100644
index 00000000000..dcae57d18e8
--- /dev/null
+++ b/tensorflow/contrib/learn/python/learn/metric_spec.py
@@ -0,0 +1,186 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""The metric spec class to flexibly connect models and metrics."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.platform import tf_logging as logging
+
+
+class MetricSpec(object):
+  """MetricSpec connects a model to metric functions.
+
+  The MetricSpec class contains all information necessary to connect the
+  output of a `model_fn` to the metrics (usually, streaming metrics) that are
+  used in evaluation.
+
+  It is passed in the `metrics` argument of `Estimator.evaluate`. The
+  `Estimator` then knows which predictions, labels, and weight to use to call a
+  given metric function.
+
+  When building the ops to run in evaluation, `Estimator` will call
+  `create_metric_ops`, which will connect the given `metric_fn` to the model
+  as detailed in the docstring for `create_metric_ops`, and return the metric.
+
+  Example:
+
+    Assuming an model has an input function which returns inputs containing
+    (among other things) a tensor with key "income", and a labels dictionary
+    containing "has_clicked". Let's assume that the `model_fn` for this model
+    returns a prediction with key "clicked".
+
+    In order to compute the accuracy of the "clicked" prediction, we would add
+    ```
+    "click accuracy": MetricSpec(metric_fn=streaming_accuracy,
+                                 prediction_key="clicked",
+                                 label_key="has_clicked")
+    ```
+    to the metrics argument to `evaluate`. If we would like the accuracy to be
+    weighted by "income", we can add that as the `weight_key` argument.
+    ```
+    "click accuracy": MetricSpec(metric_fn=streaming_accuracy,
+                                 prediction_key="clicked",
+                                 label_key="has_clicked",
+                                 weight_key="income")
+    ```
+  """
+
+  def __init__(self,
+               metric_fn,
+               prediction_key=None,
+               label_key=None,
+               weight_key=None):
+    """Constructor.
+
+    Creates a MetricSpec.
+
+    Args:
+      metric_fn: A function to use as a metric. Must accept `predictions`,
+        `labels` and optionally, `weights` tensors as inputs, and must return
+        either a single tensor which is interpreted as a value of this metric,
+        or a pair `(value_op, update_op)`, where value_op is the op to call to
+        obtain the value of the metric, and update_op should be evaluated for
+        each batch in order to update internal state.
+      prediction_key: The key for a tensor in the `predictions` dict (output
+        from the `model_fn`) to use as the `predictions` input to the
+        `metric_fn`. Optional. If `None`, the `model_fn` must return a single
+        tensor or a dict with only a single entry as `predictions`.
+      label_key: The key for a tensor in the `labels` dict (output from the
+        `input_fn`) to use as the `labels` input to the `metric_fn`.
+        Optional. If `None`, the `input_fn` must return a single tensor or a
+        dict with only a single entry as `labels`.
+      weight_key: The key for a tensor in the `inputs` dict (output from the
+        `input_fn`) to use as the `weights` input to the `metric_fn`.
+        Optional. If `None`, no weights will be passed to the `metric_fn`.
+    """
+    self._metric_fn = metric_fn
+    self._prediction_key = prediction_key
+    self._label_key = label_key
+    self._weight_key = weight_key
+
+  @property
+  def prediction_key(self):
+    return self._prediction_key
+
+  @property
+  def label_key(self):
+    return self._label_key
+
+  @property
+  def weight_key(self):
+    return self._weight_key
+
+  @property
+  def metric_fn(self):
+    return self._metric_fn
+
+  def __str__(self):
+    return ('MetricSpec(metric_fn=%s, ' % self.metric_fn.__name__ +
+            'prediction_key=%s, ' % self.prediction_key +
+            'label_key=%s, ' % self.label_key +
+            'weight_key=%s)' % self.weight_key
+           )
+
+  def create_metric_ops(self, inputs, labels, predictions):
+    """Connect our `metric_fn` to the specified members of the given dicts.
+
+    This function will call the `metric_fn` given in our constructor as follows:
+    ```
+      metric_fn(predictions[self.prediction_key],
+                labels[self.label_key],
+                weights=weights[self.weight_key])
+    ```
+    And returns the result. The `weights` argument is only passed if
+    `self.weight_key` is not `None`.
+
+    `predictions` and `labels` may be single tensors as well as dicts. If
+    `predictions` is a single tensor, `self.prediction_key` must be `None`. If
+    `predictions` is a single element dict, `self.prediction_key` is allowed to
+    be `None`. Conversely, if `labels` is a single tensor, `self.label_key` must
+    be `None`. If `labels` is a single element dict, `self.label_key` is allowed
+    to be `None`.
+
+    Args:
+      inputs: A dict of inputs produced by the `input_fn`
+      labels: A dict of labels or a single label tensor produced by the
+        `input_fn`.
+      predictions: A dict of predictions or a single tensor produced by the
+        `model_fn`.
+
+    Returns:
+      The result of calling `metric_fn`.
+
+    Raises:
+      ValueError: If `predictions` or `labels` is a single `Tensor` and
+        `self.prediction_key` or `self.label_key` is not `None`; or if
+        `self.label_key` is `None` but `labels` is a dict with more than one
+        element, or if `self.prediction_key` is `None but `predictions` is a
+        dict with more than one element.
+    """
+    def _get_dict(name, dict_or_tensor, key):
+      """Get a single tensor or an element of a dict or raise ValueError."""
+      if key:
+        if not isinstance(dict_or_tensor, dict):
+          raise ValueError('MetricSpec with ' + name + '_key specified'
+                           ' requires ' +
+                           name + 's dict, got %s' % dict_or_tensor)
+        return dict_or_tensor[key]
+      else:
+        if isinstance(dict_or_tensor, dict):
+          if len(dict_or_tensor) != 1:
+            raise ValueError('MetricSpec without specified ' + name + '_key'
+                             ' requires ' + name + 's tensor or single element'
+                             ' dict, got %s' % dict_or_tensor)
+          return dict_or_tensor.values()[0]
+        else:
+          return dict_or_tensor
+
+    # Get the predictions
+    prediction = _get_dict('prediction', predictions, self.prediction_key)
+
+    # Get the labels
+    label = _get_dict('label', labels, self.label_key)
+
+    try:
+      if self.weight_key:
+        return self.metric_fn(prediction, label,
+                              weights=inputs[self.weight_key])
+      else:
+        return self.metric_fn(prediction, label)
+    except:  # pylint: disable=bare-except
+      logging.error('Could not create metric ops for %s.' % self)
+      raise
diff --git a/tensorflow/contrib/learn/python/learn/tests/metric_spec_test.py b/tensorflow/contrib/learn/python/learn/tests/metric_spec_test.py
new file mode 100644
index 00000000000..63d2e2a1f57
--- /dev/null
+++ b/tensorflow/contrib/learn/python/learn/tests/metric_spec_test.py
@@ -0,0 +1,150 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for MetricSpec."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec
+
+
+def test_metric(predictions, labels, weights=None):
+  return predictions, labels, weights
+
+
+class MetricSpecTest(tf.test.TestCase):
+
+  def test_create_metric_ops(self):
+    features = {"feature1": "feature1_tensor", "feature2": "feature2_tensor"}
+    labels = {"label1": "label1_tensor", "label2": "label2_tensor"}
+    predictions = {"pred1": "pred1_tensor", "pred2": "pred2_tensor"}
+
+    passed = MetricSpec(metric_fn=test_metric,
+                        prediction_key="pred1",
+                        label_key="label1",
+                        weight_key="feature2").create_metric_ops(features,
+                                                                 labels,
+                                                                 predictions)
+
+    self.assertEqual(passed[0], "pred1_tensor")
+    self.assertEqual(passed[1], "label1_tensor")
+    self.assertEqual(passed[2], "feature2_tensor")
+
+  def test_no_weight(self):
+    features = {"feature1": "feature1_tensor", "feature2": "feature2_tensor"}
+    labels = {"label1": "label1_tensor", "label2": "label2_tensor"}
+    predictions = {"pred1": "pred1_tensor", "pred2": "pred2_tensor"}
+
+    passed = MetricSpec(metric_fn=test_metric,
+                        prediction_key="pred1",
+                        label_key="label1").create_metric_ops(features, labels,
+                                                              predictions)
+
+    self.assertEqual(passed[0], "pred1_tensor")
+    self.assertEqual(passed[1], "label1_tensor")
+    self.assertEqual(passed[2], None)
+
+  def test_fail_no_prediction(self):
+    features = {"feature1": "feature1_tensor", "feature2": "feature2_tensor"}
+    labels = {"label1": "label1_tensor", "label2": "label2_tensor"}
+    predictions = {"pred1": "pred1_tensor", "pred2": "pred2_tensor"}
+
+    self.assertRaisesRegexp(ValueError,
+                            "MetricSpec without specified prediction_key "
+                            "requires predictions tensor or single element "
+                            "dict, got",
+                            MetricSpec(metric_fn=test_metric,
+                                       label_key="label1",
+                                       weight_key="feature2").create_metric_ops,
+                            features, labels, predictions)
+
+  def test_fail_no_label(self):
+    features = {"feature1": "feature1_tensor", "feature2": "feature2_tensor"}
+    labels = {"label1": "label1_tensor", "label2": "label2_tensor"}
+    predictions = {"pred1": "pred1_tensor", "pred2": "pred2_tensor"}
+
+    self.assertRaisesRegexp(ValueError,
+                            "MetricSpec without specified label_key requires "
+                            "labels tensor or single element dict, got",
+                            MetricSpec(metric_fn=test_metric,
+                                       prediction_key="pred1",
+                                       weight_key="feature2").create_metric_ops,
+                            features, labels, predictions)
+
+  def test_single_prediction(self):
+    features = {"feature1": "feature1_tensor", "feature2": "feature2_tensor"}
+    labels = {"label1": "label1_tensor", "label2": "label2_tensor"}
+    predictions = "pred1_tensor"
+
+    passed = MetricSpec(metric_fn=test_metric,
+                        label_key="label1",
+                        weight_key="feature2").create_metric_ops(features,
+                                                                 labels,
+                                                                 predictions)
+
+    self.assertEqual(passed[0], "pred1_tensor")
+    self.assertEqual(passed[1], "label1_tensor")
+    self.assertEqual(passed[2], "feature2_tensor")
+
+  def test_single_label(self):
+    features = {"feature1": "feature1_tensor", "feature2": "feature2_tensor"}
+    labels = "label1_tensor"
+    predictions = {"pred1": "pred1_tensor", "pred2": "pred2_tensor"}
+
+    passed = MetricSpec(metric_fn=test_metric,
+                        prediction_key="pred1",
+                        weight_key="feature2").create_metric_ops(features,
+                                                                 labels,
+                                                                 predictions)
+
+    self.assertEqual(passed[0], "pred1_tensor")
+    self.assertEqual(passed[1], "label1_tensor")
+    self.assertEqual(passed[2], "feature2_tensor")
+
+  def test_fail_single_prediction(self):
+    features = {"feature1": "feature1_tensor", "feature2": "feature2_tensor"}
+    labels = {"label1": "label1_tensor", "label2": "label2_tensor"}
+    predictions = "pred1_tensor"
+
+    self.assertRaisesRegexp(ValueError,
+                            "MetricSpec with prediction_key specified requires "
+                            "predictions dict, got",
+                            MetricSpec(metric_fn=test_metric,
+                                       prediction_key="pred1",
+                                       label_key="label1",
+                                       weight_key="feature2").create_metric_ops,
+                            features, labels, predictions)
+
+  def test_fail_single_label(self):
+    features = {"feature1": "feature1_tensor", "feature2": "feature2_tensor"}
+    labels = "label1_tensor"
+    predictions = {"pred1": "pred1_tensor", "pred2": "pred2_tensor"}
+
+    self.assertRaisesRegexp(ValueError,
+                            "MetricSpec with label_key specified requires "
+                            "labels dict, got",
+                            MetricSpec(metric_fn=test_metric,
+                                       prediction_key="pred1",
+                                       label_key="label1",
+                                       weight_key="feature2").create_metric_ops,
+                            features, labels, predictions)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index db8a2f91ee3..1b04a70d16e 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -367,6 +367,7 @@ class SdcaModel(object):
 
     logging_ops.scalar_summary('approximate_duality_gap',
                                self.approximate_duality_gap())
+    logging_ops.scalar_summary('examples_seen', self._hashtable.size())
 
   def _symmetric_l1_regularization(self):
     return self._options['symmetric_l1_regularization']
diff --git a/tensorflow/contrib/losses/python/losses/__init__.py b/tensorflow/contrib/losses/python/losses/__init__.py
index 8f7c5a0181e..350ae23ce44 100644
--- a/tensorflow/contrib/losses/python/losses/__init__.py
+++ b/tensorflow/contrib/losses/python/losses/__init__.py
@@ -116,6 +116,7 @@ weighted average over the individual prediction errors:
 @@mean_squared_error
 @@sigmoid_cross_entropy
 @@softmax_cross_entropy
+@@sparse_softmax_cross_entropy
 
 The following are deprecated in favor of `mean_pairwise_squared_error` and
 `mean_squared_error`.
diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py
index 777e0d37176..0238935ebc8 100644
--- a/tensorflow/contrib/losses/python/losses/loss_ops.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops.py
@@ -41,6 +41,7 @@ __all__ = ["absolute_difference",
            "mean_squared_error",
            "sigmoid_cross_entropy",
            "softmax_cross_entropy",
+           "sparse_softmax_cross_entropy",
            "sum_of_pairwise_squares",
            "sum_of_squares"]
 
@@ -354,8 +355,8 @@ def softmax_cross_entropy(logits, onehot_labels, weight=1.0,
     A scalar `Tensor` representing the loss value.
 
   Raises:
-    ValueError: If the shape of `predictions` doesn't match that of `targets` or
-      if the shape of `weight` is invalid or if `weight` is None.
+    ValueError: If the shape of `logits` doesn't match that of `onehot_labels`
+      or if the shape of `weight` is invalid or if `weight` is None.
   """
   with ops.name_scope(scope, "softmax_cross_entropy_loss",
                       [logits, onehot_labels]):
@@ -375,6 +376,39 @@ def softmax_cross_entropy(logits, onehot_labels, weight=1.0,
     return _compute_weighted_loss(losses, weight)
 
 
+def sparse_softmax_cross_entropy(logits, labels, weight=1.0, scope=None):
+  """Cross-entropy loss using tf.nn.sparse_softmax_cross_entropy_with_logits.
+
+  `weight` acts as a coefficient for the loss. If a scalar is provided,
+  then the loss is simply scaled by the given value. If `weight` is a
+  tensor of size [`batch_size`], then the loss weights apply to each
+  corresponding sample.
+
+  Args:
+    logits: [batch_size, num_classes] logits outputs of the network .
+    labels: [batch_size, 1] or [batch_size] target labels of dtype `int32` or
+      `int64` in the range `[0, num_classes)`.
+    weight: Coefficients for the loss. The tensor must be a scalar or a tensor
+      of shape [batch_size] or [batch_size, 1].
+    scope: the scope for the operations performed in computing the loss.
+
+  Returns:
+    A scalar `Tensor` representing the loss value.
+
+  Raises:
+    ValueError: If the shapes of logits, labels, and weight are incompatible, or
+      if `weight` is None.
+  """
+  with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss",
+                      [logits, labels]):
+    labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]])
+    weight = array_ops.squeeze(weight)
+
+    losses = nn.sparse_softmax_cross_entropy_with_logits(logits, labels,
+                                                         name="xentropy")
+    return _compute_weighted_loss(losses, weight)
+
+
 def log_loss(predictions, targets, weight=1.0, epsilon=1e-7, scope=None):
   """Adds a Log Loss term to the training procedure.
 
diff --git a/tensorflow/contrib/losses/python/losses/loss_ops_test.py b/tensorflow/contrib/losses/python/losses/loss_ops_test.py
index b746416b2e3..1e2fb14d709 100644
--- a/tensorflow/contrib/losses/python/losses/loss_ops_test.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops_test.py
@@ -173,7 +173,7 @@ class SoftmaxCrossEntropyLossTest(tf.test.TestCase):
       loss = tf.contrib.losses.softmax_cross_entropy(logits, labels, weight)
       self.assertAlmostEqual(loss.eval(), (1.2 + 3.4 + 5.6) * 10.0 / 3.0, 3)
 
-  def testAllWrongAllMissing(self):
+  def testAllWrongAllWeightsMissing(self):
     logits = tf.constant([[10.0, 0.0, 0.0],
                           [0.0, 10.0, 0.0],
                           [0.0, 0.0, 10.0]])
@@ -185,7 +185,7 @@ class SoftmaxCrossEntropyLossTest(tf.test.TestCase):
       loss = tf.contrib.losses.softmax_cross_entropy(logits, labels, weight)
       self.assertAlmostEqual(loss.eval(), 0.0, 3)
 
-  def testSomeMissing(self):
+  def testSomeWeightsMissing(self):
     logits = tf.constant([[10.0, 0.0, 0.0],
                           [0.0, 10.0, 0.0],
                           [0.0, 0.0, 10.0]])
@@ -235,6 +235,216 @@ class SoftmaxCrossEntropyLossTest(tf.test.TestCase):
       self.assertAlmostEqual(loss.eval(), expected_value, 3)
 
 
+class SparseSoftmaxCrossEntropyLossTest(tf.test.TestCase):
+
+  def testNoneWeightRaisesValueError(self):
+    logits = tf.constant([[10.0, 0.0, 0.0],
+                          [0.0, 10.0, 0.0],
+                          [0.0, 0.0, 10.0]])
+    labels = tf.constant([[0], [1], [2]])
+    with self.test_session():
+      with self.assertRaises(ValueError):
+        tf.contrib.losses.sparse_softmax_cross_entropy(
+            logits, labels, weight=None)
+
+  def testAllCorrectInt32Labels(self):
+    with self.test_session():
+      logits = tf.constant([[10.0, 0.0, 0.0],
+                            [0.0, 10.0, 0.0],
+                            [0.0, 0.0, 10.0]])
+      labels = tf.constant([[0], [1], [2]], dtype=tf.int32)
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(logits, labels)
+      self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value')
+      self.assertAlmostEqual(loss.eval(), 0.0, 3)
+
+  def testAllCorrectInt64Labels(self):
+    with self.test_session():
+      logits = tf.constant([[10.0, 0.0, 0.0],
+                            [0.0, 10.0, 0.0],
+                            [0.0, 0.0, 10.0]])
+      labels = tf.constant([[0], [1], [2]], dtype=tf.int64)
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(logits, labels)
+      self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value')
+      self.assertAlmostEqual(loss.eval(), 0.0, 3)
+
+  def testAllCorrectNonColumnLabels(self):
+    with self.test_session():
+      logits = tf.constant([[10.0, 0.0, 0.0],
+                            [0.0, 10.0, 0.0],
+                            [0.0, 0.0, 10.0]])
+      labels = tf.constant([0, 1, 2])
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(logits, labels)
+      self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value')
+      self.assertAlmostEqual(loss.eval(), 0.0, 3)
+
+  def testAllWrongInt32Labels(self):
+    logits = tf.constant([[10.0, 0.0, 0.0],
+                          [0.0, 10.0, 0.0],
+                          [0.0, 0.0, 10.0]])
+    labels = tf.constant([[2], [0], [1]], dtype=tf.int32)
+
+    with self.test_session():
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(logits, labels)
+      self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value')
+      self.assertAlmostEqual(loss.eval(), 10.0, 3)
+
+  def testAllWrongInt64Labels(self):
+    logits = tf.constant([[10.0, 0.0, 0.0],
+                          [0.0, 10.0, 0.0],
+                          [0.0, 0.0, 10.0]])
+    labels = tf.constant([[2], [0], [1]], dtype=tf.int64)
+
+    with self.test_session():
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(logits, labels)
+      self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value')
+      self.assertAlmostEqual(loss.eval(), 10.0, 3)
+
+  def testAllWrongNonColumnLabels(self):
+    logits = tf.constant([[10.0, 0.0, 0.0],
+                          [0.0, 10.0, 0.0],
+                          [0.0, 0.0, 10.0]])
+    labels = tf.constant([2, 0, 1])
+
+    with self.test_session():
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(logits, labels)
+      self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value')
+      self.assertAlmostEqual(loss.eval(), 10.0, 3)
+
+  def testNonZeroLossWithPythonScalarWeight(self):
+    logits = tf.constant([[10.0, 0.0, 0.0],
+                          [0.0, 10.0, 0.0],
+                          [0.0, 0.0, 10.0]])
+    labels = tf.constant([[2], [0], [1]])
+    weight = 2.3
+    with self.test_session():
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(
+          logits, labels, weight)
+      self.assertAlmostEqual(loss.eval(), weight * 10.0, 3)
+
+  def testNonZeroLossWithScalarTensorWeight(self):
+    logits = tf.constant([[10.0, 0.0, 0.0],
+                          [0.0, 10.0, 0.0],
+                          [0.0, 0.0, 10.0]])
+    labels = tf.constant([[2], [0], [1]])
+    weight = 2.3
+    with self.test_session():
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(
+          logits, labels, tf.constant(weight))
+      self.assertAlmostEqual(loss.eval(), weight * 10.0, 3)
+
+  def testNonZeroLossWithOneDimBatchSpecificWeights(self):
+    logits = tf.constant([[10.0, 0.0, 0.0],
+                          [0.0, 10.0, 0.0],
+                          [0.0, 0.0, 10.0]])
+    labels = tf.constant([[2], [0], [1]])
+    weight = tf.constant([1.2, 3.4, 5.6], shape=[3])
+    with self.test_session():
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(
+          logits, labels, weight)
+      self.assertAlmostEqual(loss.eval(), (1.2 + 3.4 + 5.6) * 10.0 / 3.0, 3)
+
+  def testNonZeroLossWithColumnWeights(self):
+    logits = tf.constant([[10.0, 0.0, 0.0],
+                          [0.0, 10.0, 0.0],
+                          [0.0, 0.0, 10.0]])
+    labels = tf.constant([[2], [0], [1]])
+    weight = tf.constant([[1.2], [3.4], [5.6]])
+    with self.test_session():
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(
+          logits, labels, weight)
+      self.assertAlmostEqual(loss.eval(), (1.2 + 3.4 + 5.6) * 10.0 / 3.0, 3)
+
+  def testAllWrongAllWeightsMissing(self):
+    logits = tf.constant([[10.0, 0.0, 0.0],
+                          [0.0, 10.0, 0.0],
+                          [0.0, 0.0, 10.0]])
+    labels = tf.constant([[2], [0], [1]])
+    weight = tf.constant([0, 0, 0], shape=[3])
+    with self.test_session():
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(
+          logits, labels, weight)
+      self.assertAlmostEqual(loss.eval(), 0.0, 3)
+
+  def testSomeWeightsMissing(self):
+    logits = tf.constant([[10.0, 0.0, 0.0],
+                          [0.0, 10.0, 0.0],
+                          [0.0, 0.0, 10.0]])
+    labels = tf.constant([[2], [0], [1]])
+    weight = tf.constant([1.2, 0, 0], shape=[3])
+    with self.test_session():
+      loss = tf.contrib.losses.sparse_softmax_cross_entropy(
+          logits, labels, weight)
+      self.assertAlmostEqual(loss.eval(), 12.0, 3)
+
+  def testMeasurementSpecificWeightsRaisesException(self):
+    with self.test_session():
+      logits = tf.constant([[100.0, -100.0, -100.0],
+                            [-100.0, 100.0, -100.0],
+                            [-100.0, -100.0, 100.0]])
+      labels = tf.constant([[0], [1], [2]])
+      weight = tf.constant([[3, 4, 5],
+                            [2, 6, 0],
+                            [8, 0, 1]])
+
+      with self.assertRaises(ValueError):
+        tf.contrib.losses.sparse_softmax_cross_entropy(
+            logits, labels, weight=weight).eval()
+
+  def testInconsistentWeightSizeRaisesException(self):
+    """The weight tensor has incorrect number of elements."""
+    with self.test_session():
+      logits = tf.constant([[100.0, -100.0, -100.0],
+                            [-100.0, 100.0, -100.0],
+                            [-100.0, -100.0, 100.0]])
+      labels = tf.constant([[0], [1], [2]])
+      weight = tf.constant([1.2, 3.4, 5.6, 7.8])
+
+      with self.assertRaises(ValueError):
+        tf.contrib.losses.sparse_softmax_cross_entropy(
+            logits, labels, weight=weight).eval()
+
+  def testInconsistentLabelSizeRaisesException(self):
+    """The label tensor has incorrect number of elements."""
+    with self.test_session():
+      logits = tf.constant([[100.0, -100.0, -100.0],
+                            [-100.0, 100.0, -100.0],
+                            [-100.0, -100.0, 100.0]])
+      labels = tf.constant([[0], [1], [2], [3]])
+      weight = tf.constant([1.2, 3.4, 5.6])
+
+      with self.assertRaises(ValueError):
+        tf.contrib.losses.sparse_softmax_cross_entropy(
+            logits, labels, weight=weight).eval()
+
+  def testInconsistentWeightShapeRaisesException(self):
+    """The weight tensor has incorrect shape."""
+    with self.test_session():
+      logits = tf.constant([[100.0, -100.0, -100.0, -100.0],
+                            [-100.0, 100.0, -100.0, -100.0],
+                            [-100.0, -100.0, 100.0, -100.0],
+                            [-100.0, -100.0, -100.0, 100.0]])
+      labels = tf.constant([[0], [1], [2], [3]])
+      weight = tf.constant([[1.2, 3.4], [5.6, 7.8]])
+
+      with self.assertRaises(ValueError):
+        tf.contrib.losses.sparse_softmax_cross_entropy(
+            logits, labels, weight=weight).eval()
+
+  def testInconsistentLabelShapeRaisesException(self):
+    """The label tensor has incorrect shape."""
+    with self.test_session():
+      logits = tf.constant([[100.0, -100.0, -100.0, -100.0],
+                            [-100.0, 100.0, -100.0, -100.0],
+                            [-100.0, -100.0, 100.0, -100.0],
+                            [-100.0, -100.0, -100.0, 100.0]])
+      labels = tf.constant([[0, 1], [2, 3]])
+      weight = tf.constant([1.2, 3.4, 5.6, 7.8])
+
+      with self.assertRaises(tf.errors.InvalidArgumentError):
+        tf.contrib.losses.sparse_softmax_cross_entropy(
+            logits, labels, weight=weight).eval()
+
+
 class SigmoidCrossEntropyLossTest(tf.test.TestCase):
 
   def testAllCorrectSigmoid(self):
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index 69cb84c2bb2..7f2218ada7a 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -419,6 +419,7 @@ $(wildcard tensorflow/core/graph/*.cc) \
 $(wildcard tensorflow/core/lib/*/*.cc) \
 $(wildcard tensorflow/core/platform/*.cc) \
 $(wildcard tensorflow/core/platform/*/*.cc) \
+$(wildcard tensorflow/core/platform/*/*/*.cc) \
 $(wildcard tensorflow/core/util/*.cc) \
 $(wildcard tensorflow/core/util/*/*.cc)
 CORE_CC_EXCLUDE_SRCS := \
diff --git a/tensorflow/contrib/makefile/proto_text_cc_files.txt b/tensorflow/contrib/makefile/proto_text_cc_files.txt
index 1809a7a69ba..784fa84c63a 100644
--- a/tensorflow/contrib/makefile/proto_text_cc_files.txt
+++ b/tensorflow/contrib/makefile/proto_text_cc_files.txt
@@ -25,7 +25,7 @@ tensorflow/core/lib/random/simple_philox.cc
 tensorflow/core/lib/random/random.cc
 tensorflow/core/lib/random/distribution_sampler.cc
 tensorflow/core/lib/io/zlib_outputbuffer.cc
-tensorflow/core/lib/io/zlib_inputbuffer.cc
+tensorflow/core/lib/io/zlib_inputstream.cc
 tensorflow/core/lib/io/two_level_iterator.cc
 tensorflow/core/lib/io/table_builder.cc
 tensorflow/core/lib/io/table.cc
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 4f5c3176dee..0478adab2a9 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -76,6 +76,7 @@ tensorflow/core/kernels/example_parsing_ops.cc
 tensorflow/core/kernels/dynamic_stitch_op.cc
 tensorflow/core/kernels/dynamic_partition_op.cc
 tensorflow/core/kernels/dense_update_ops.cc
+tensorflow/core/kernels/deep_conv2d.cc
 tensorflow/core/kernels/cwise_ops_common.cc
 tensorflow/core/kernels/cwise_op_tanh.cc
 tensorflow/core/kernels/cwise_op_sub.cc
@@ -100,6 +101,7 @@ tensorflow/core/kernels/cwise_op_div.cc
 tensorflow/core/kernels/cwise_op_add.cc
 tensorflow/core/kernels/ctc_decoder_ops.cc
 tensorflow/core/kernels/conv_ops_using_gemm.cc
+tensorflow/core/kernels/conv_ops_fused.cc
 tensorflow/core/kernels/conv_ops.cc
 tensorflow/core/kernels/conv_grad_ops.cc
 tensorflow/core/kernels/control_flow_ops.cc
diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py
index 4947ecaf409..4a0f2677afe 100644
--- a/tensorflow/contrib/metrics/__init__.py
+++ b/tensorflow/contrib/metrics/__init__.py
@@ -127,7 +127,6 @@ time.
 
 @@aggregate_metrics
 @@aggregate_metric_map
-@@run_metric
 
 ## Set `Ops`
 
@@ -147,7 +146,6 @@ from tensorflow.contrib.metrics.python.ops.confusion_matrix_ops import confusion
 from tensorflow.contrib.metrics.python.ops.histogram_ops import auc_using_histogram
 from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metric_map
 from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metrics
-from tensorflow.contrib.metrics.python.ops.metric_ops import run_metric
 from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_accuracy
 from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_auc
 from tensorflow.contrib.metrics.python.ops.metric_ops import streaming_mean
diff --git a/tensorflow/contrib/metrics/kernels/set_kernels.cc b/tensorflow/contrib/metrics/kernels/set_kernels.cc
index 9d3b3038a8c..9bfa6c93a31 100644
--- a/tensorflow/contrib/metrics/kernels/set_kernels.cc
+++ b/tensorflow/contrib/metrics/kernels/set_kernels.cc
@@ -213,7 +213,7 @@ void PopulateFromDenseGroup(OpKernelContext* ctx, const Tensor& input_tensor,
   result->clear();
   auto input_flat = input_tensor.flat<T>();
   const auto start = std::inner_product(
-      group_indices.begin(), group_indices.end(), input_strides.begin(), 0);
+      group_indices.begin(), group_indices.end(), input_strides.begin(), 0L);
   const TensorShape& input_shape = input_tensor.shape();
   const auto end = start + input_shape.dim_size(input_shape.dims() - 1);
   for (int64 i = start; i < end; ++i) {
@@ -273,7 +273,7 @@ void SetSizeOp<T>::Compute(OpKernelContext* ctx) {
 
     const auto group_key = group.group();
     const auto output_index = std::inner_product(
-        group_key.begin(), group_key.end(), output_strides.begin(), 0);
+        group_key.begin(), group_key.end(), output_strides.begin(), 0L);
     out(output_index) = group_set.size();
   }
 }
@@ -441,7 +441,7 @@ void SetOperationOp<T>::ComputeDenseToDense(OpKernelContext* ctx) const {
 
     std::set<T> group_set;
     ApplySetOperation(set1_group_set, set2_group_set, &group_set);
-    if (group_set.size() > 0) {
+    if (!group_set.empty()) {
       group_sets[group_indices] = group_set;
       const auto set_size = group_set.size();
       if (set_size > max_set_size) {
@@ -516,7 +516,7 @@ void SetOperationOp<T>::ComputeDenseToSparse(OpKernelContext* ctx) const {
 
     std::set<T> group_set;
     ApplySetOperation(set1_group_set, set2_group_set, &group_set);
-    if (group_set.size() > 0) {
+    if (!group_set.empty()) {
       group_sets[group_indices] = group_set;
       const auto set_size = group_set.size();
       if (set_size > max_set_size) {
@@ -632,7 +632,7 @@ void SetOperationOp<T>::ComputeSparseToSparse(OpKernelContext* ctx) const {
 
     std::set<T> group_set;
     ApplySetOperation(set1_group_set, set2_group_set, &group_set);
-    if (group_set.size() > 0) {
+    if (!group_set.empty()) {
       group_sets[*group_indices] = group_set;
       const auto set_size = group_set.size();
       if (set_size > max_set_size) {
diff --git a/tensorflow/contrib/metrics/python/kernel_tests/confusion_matrix_ops_test.py b/tensorflow/contrib/metrics/python/kernel_tests/confusion_matrix_ops_test.py
index 4797885fe0c..2e13cfe0d35 100644
--- a/tensorflow/contrib/metrics/python/kernel_tests/confusion_matrix_ops_test.py
+++ b/tensorflow/contrib/metrics/python/kernel_tests/confusion_matrix_ops_test.py
@@ -121,7 +121,7 @@ class ConfusionMatrixTest(tf.test.TestCase):
     predictions = np.asarray([1, 2, 3])
     labels = np.asarray([1, 2])
     self.assertRaisesRegexp(
-        ValueError, "are not compatible",
+        ValueError, "must be equal",
         tf.contrib.metrics.confusion_matrix, predictions, labels)
 
   def testOutputIsInt32(self):
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index efa56ec2226..f2974ebd2f6 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -22,8 +22,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import inspect
-
 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
 
 from tensorflow.contrib.metrics.python.ops import confusion_matrix_ops
@@ -467,6 +465,8 @@ def streaming_accuracy(predictions, labels, weights=None,
   predictions, labels = metric_ops_util.remove_squeezable_dimensions(
       predictions, labels)
   predictions.get_shape().assert_is_compatible_with(labels.get_shape())
+  if labels.dtype != predictions.dtype:
+    predictions = math_ops.cast(predictions, labels.dtype)
   is_correct = math_ops.to_float(math_ops.equal(predictions, labels))
   return streaming_mean(is_correct, weights, metrics_collections,
                         updates_collections, name or 'accuracy')
@@ -2126,37 +2126,4 @@ def aggregate_metric_map(names_to_tuples):
   return dict(zip(metric_names, value_ops)), dict(zip(metric_names, update_ops))
 
 
-def run_metric(metric, predictions, targets, weights=None):
-  """Runs a single metric.
-
-  This function runs metric on given predictions and targets. weights will be
-  used if metric contains 'weights' in its argument.
-
-  Args:
-    metric: A function that evaluates targets given predictions.
-    predictions: A `Tensor` of arbitrary shape.
-    targets: A `Tensor` of the same shape as `predictions`.
-    weights: A set of weights that can be used in metric function to compute
-      weighted result.
-
-  Returns:
-    result: result returned by metric function.
-  """
-  metric_args = []
-  if hasattr(metric, '__code__'):
-    # Regular function.
-    metric_args = inspect.getargspec(metric).args
-  elif hasattr(metric, 'func') and hasattr(metric, 'keywords'):
-    # Partial function.
-    for arg in inspect.getargspec(metric.func).args:
-      if metric.keywords and arg not in metric.keywords.keys():
-        metric_args.append(arg)
-  if 'weights' in metric_args:
-    result = metric(predictions, targets, weights=weights)
-  else:
-    result = metric(predictions, targets)
-
-  return result
-
-
 __all__ = make_all(__name__)
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
index a8bdc0279e1..5a0d18b7d5d 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from functools import partial
 import math
 
 import numpy as np
@@ -2851,37 +2850,5 @@ class AggregateMetricMapTest(tf.test.TestCase):
       self.assertEqual(4, names_to_values['m2'].eval())
 
 
-class RunMetricTest(tf.test.TestCase):
-
-  def setUp(self):
-    tf.reset_default_graph()
-
-  def testRunMetric(self):
-    predictions = tf.constant([2, 4, 6, 8], shape=(1, 4), dtype=tf.float32)
-    labels = tf.constant([1, 3, 2, 3], shape=(1, 4), dtype=tf.float32)
-    weights = tf.constant([0, 1, 0, 1], shape=(1, 4))
-
-    error, update_op = metrics.run_metric(metrics.streaming_mean_squared_error,
-                                          predictions, labels, weights)
-    with self.test_session() as sess:
-      sess.run(tf.initialize_local_variables())
-      self.assertEqual(13, sess.run(update_op))
-      self.assertEqual(13, error.eval())
-
-  def testRunMetricsWithOutWeights(self):
-    predictions = tf.constant([2, 4, 6], shape=(1, 3), dtype=tf.float32)
-    labels = tf.constant([1, 3, 2], shape=(1, 3), dtype=tf.float32)
-
-    streaming_mean_squared_error_no_weight = partial(
-        metrics.streaming_mean_squared_error, weights=None)
-
-    error, update_op = metrics.run_metric(
-        streaming_mean_squared_error_no_weight, predictions, labels)
-    with self.test_session() as sess:
-      sess.run(tf.initialize_local_variables())
-      self.assertEqual(6, sess.run(update_op))
-      self.assertEqual(6, error.eval())
-
-
 if __name__ == '__main__':
   tf.test.main()
diff --git a/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
index 9c452fe39c7..945341d3258 100644
--- a/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
+++ b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc
@@ -31,6 +31,7 @@ limitations under the License.
 
 #ifdef USE_HEXAGON_LIBS
 #include "tensorflow/core/platform/hexagon/gemm_wrapper.h"
+#include "tensorflow/core/platform/profile_utils/cpu_utils.h"
 #endif
 
 namespace tensorflow {
@@ -49,6 +50,30 @@ class QuantizedMatMulOpForHexagonTest : public OpsTestBase {
   }
 };
 
+// Shows some statistics of hexagon dsp using hexagon specific APIs
+#ifdef USE_HEXAGON_LIBS
+TEST_F(QuantizedMatMulOpForHexagonTest, EvaluateSharedLibOverhead) {
+  const uint64 overhead_shared_lib_start =
+      profile_utils::CpuUtils::GetCurrentClockCycle();
+  const int wrapper_version = hexagon_gemm_wrapper_GetWrapperVersion();
+  const uint64 overhead_shared_lib_end =
+      profile_utils::CpuUtils::GetCurrentClockCycle();
+  const uint64 overhead_hexagon_rpc_start =
+      profile_utils::CpuUtils::GetCurrentClockCycle();
+  const int hexagon_binary_version =
+      hexagon_gemm_wrapper_GetHexagonBinaryVersion();
+  const uint64 overhead_hexagon_rpc_end =
+      profile_utils::CpuUtils::GetCurrentClockCycle();
+  LOG(INFO) << "Shared lib (ver = " << wrapper_version << ") overhead is "
+            << (overhead_shared_lib_end - overhead_shared_lib_start)
+            << " cycles";
+  LOG(INFO) << "hexagon rpc (ver = " << hexagon_binary_version
+            << ") overhead is "
+            << (overhead_hexagon_rpc_end - overhead_hexagon_rpc_start)
+            << " cycles";
+}
+#endif
+
 // Runs two small matrices through the operator, and leaves all the parameters
 // at their default values.
 // This test is a sample to execute matmul on hexagon.
diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD
index 92489033c85..39a8a9e8ca5 100644
--- a/tensorflow/contrib/rnn/BUILD
+++ b/tensorflow/contrib/rnn/BUILD
@@ -28,6 +28,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/rnn_cell_test.py"],
     additional_deps = [
         ":rnn_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -39,6 +40,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/lstm_ops_test.py"],
     additional_deps = [
         ":rnn_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -82,6 +84,7 @@ cuda_py_tests(
     srcs = ["python/kernel_tests/gru_ops_test.py"],
     additional_deps = [
         ":rnn_py",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
diff --git a/tensorflow/contrib/training/BUILD b/tensorflow/contrib/training/BUILD
index a44143ba406..79901b6ee56 100644
--- a/tensorflow/contrib/training/BUILD
+++ b/tensorflow/contrib/training/BUILD
@@ -11,6 +11,7 @@ py_library(
     name = "training_py",
     srcs = [
         "__init__.py",
+        "python/training/bucket_ops.py",
         "python/training/sampling_ops.py",
         "python/training/sequence_queueing_state_saver.py",
     ],
@@ -67,6 +68,18 @@ py_test(
     ],
 )
 
+py_test(
+    name = "bucket_ops_test",
+    size = "medium",
+    srcs = ["python/training/bucket_ops_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":training_py",
+        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
+
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/training/__init__.py b/tensorflow/contrib/training/__init__.py
index d8cd9058008..3c0ff0f8cfa 100644
--- a/tensorflow/contrib/training/__init__.py
+++ b/tensorflow/contrib/training/__init__.py
@@ -38,6 +38,17 @@ balanced.
 
 @@stratified_sample
 @@stratified_sample_unknown_dist
+
+## Bucketing
+
+Use ['bucket'](#bucket) or
+['bucket_by_sequence_length'](#bucket_by_sequence_length) to stratify
+minibatches into groups ("buckets").  Use `bucket_by_sequence_length`
+with the argument `dynamic_pad=True` to receive minibatches of similarly
+sized sequences for efficient training via `dynamic_rnn`.
+
+@@bucket
+@@bucket_by_sequence_length
 """
 
 from __future__ import absolute_import
@@ -45,6 +56,7 @@ from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=unused-import,wildcard-import
+from tensorflow.contrib.training.python.training.bucket_ops import *
 from tensorflow.contrib.training.python.training.sampling_ops import *
 from tensorflow.contrib.training.python.training.sequence_queueing_state_saver import *
 from tensorflow.python.util.all_util import make_all
diff --git a/tensorflow/contrib/training/python/training/bucket_ops.py b/tensorflow/contrib/training/python/training/bucket_ops.py
new file mode 100644
index 00000000000..3a28c9141fa
--- /dev/null
+++ b/tensorflow/contrib/training/python/training/bucket_ops.py
@@ -0,0 +1,374 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Operations for bucketing data into groups.
+
+The classes and functions in this module are used to queue up data into
+buckets conditional on side information (e.g. sequence length).
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+
+import numpy as np
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.training import input as input_py
+from tensorflow.python.training import queue_runner
+
+
+# pylint: disable=protected-access
+_as_original_type = input_py._as_original_type
+_as_tensor_list = input_py._as_tensor_list
+_deserialize_sparse_tensors = input_py._deserialize_sparse_tensors
+_dtypes = input_py._dtypes
+_serialize_sparse_tensors = input_py._serialize_sparse_tensors
+_shapes = input_py._shapes
+_which_queue = input_py._which_queue
+# pylint: enable=protected-access
+
+
+def _validate_bucket(tensor_list):
+  tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensor_list)
+  if not tensor_list:
+    raise ValueError("Expected at least one tensor in bucket().")
+  return tensor_list
+
+
+def bucket(tensors,
+           which_bucket,
+           batch_size,
+           num_buckets,
+           num_threads=1,
+           capacity=32,
+           shapes=None,
+           dynamic_pad=False,
+           allow_smaller_final_batch=False,
+           keep_input=None,
+           shared_name=None,
+           name=None):
+  """Lazy bucketing of input tensors according to `which_bucket`.
+
+  The argument `tensors` can be a list or a dictionary of tensors.
+  The value returned by the function will be of the same type
+  as `tensors`.
+
+  The tensors entering this function are put into the bucket given by
+  `which_bucket`.  Each bucket has its own queue.  When a bucket contains
+  `batch_size` elements, this minibatch is pushed onto a top queue.  The
+  tensors returned from this function are a the result of dequeueing the
+  next minibatch from this top queue.
+
+  This function is implemented using several queues. A `QueueRunner` for the
+  queues is added to the current `Graph`'s `QUEUE_RUNNER` collection.
+
+  As the returned tensors are the result of of a dequeue operation, evaluating
+  them will throw a `tf.errors.OutOfRangeError` when the input queue is
+  exhausted.  If these tensors are feeding another input queue, its queue runner
+  will catch this exception, however, if they are used in your main thread
+  you are responsible for catching this yourself.
+
+  *N.B.:* If `dynamic_pad` is `False`, you must ensure that either
+  (i) the `shapes` argument is passed, or (ii) all of the tensors in
+  `tensors` must have fully-defined shapes. `ValueError` will be
+  raised if neither of these conditions holds.
+
+  If `dynamic_pad` is `True`, it is sufficient that the *rank* of the
+  tensors is known, but individual dimensions may have shape `None`.
+  In this case, for each enqueue the dimensions with value `None`
+  may have a variable length; upon dequeue, the output tensors will be padded
+  on the right to the maximum shape of the tensors in the current minibatch.
+  For numbers, this padding takes value 0.  For strings, this padding is
+  the empty string.  See `PaddingFIFOQueue` for more info.
+
+  If `allow_smaller_final_batch` is `True`, a smaller batch value than
+  `batch_size` is returned when the queues are closed and there are not enough
+  elements to fill the batch, otherwise the pending elements are discarded.
+  In addition, all output tensors' static shapes, as accessed via the
+  `get_shape()` method will have a 0th `Dimension` value of `None`, and
+  operations that depend on fixed batch_size would fail.
+
+  Args:
+    tensors: The list or dictionary of tensors, representing a single element,
+      to bucket.  Nested lists are not supported.
+    which_bucket: An `int32` scalar Tensor taking a value in `[0, num_buckets)`.
+    batch_size: The new batch size pulled from the queue
+      (python int or int32 scalar).
+    num_buckets: A python integer, the number of buckets.
+    num_threads: An integer.  The number of threads enqueuing `tensors`.
+    capacity: An integer. The maximum number of minibatches in the top queue,
+      and also the maximum number of elements within each bucket.
+    shapes: (Optional) The shapes for each example.  Defaults to the
+      inferred shapes for `tensors`.
+    dynamic_pad: Boolean.  Allow variable dimensions in input shapes.
+      The given dimensions are padded upon dequeue so that tensors within a
+      batch have the same shapes.
+    allow_smaller_final_batch: (Optional) Boolean. If `True`, allow the final
+      batches to be smaller if there are insufficient items left in the queues.
+    keep_input: (Optional).  A `bool` scalar Tensor.  If provided, this tensor
+      controls whether the input is added to the queue or not.  If it evaluates
+      `True`, then `tensors` are added to the bucket; otherwise they are
+      dropped.  This tensor essentially acts as a filtering mechanism.
+      The default behavior is to assume `keep_input=True`.
+    shared_name: (Optional). If set, the queues will be shared under the given
+      name across multiple sessions.
+    name: (Optional) A name for the operations.
+
+  Returns:
+    A tuple `(bucket, outputs)` where `bucket` is
+    a `int32` scalar tensor and `outputs` is a list or
+    dictionary of batched outputs corresponding to elements of `tensors`.
+    Every step will receive a new bucket of outputs.
+
+  Raises:
+    ValueError: If the `shapes` are not specified, and cannot be
+      inferred from the elements of `tensors`.
+  """
+  tensor_list = _as_tensor_list(tensors)
+  with ops.name_scope(name, "bucket", tensor_list) as name:
+    tensor_list = _validate_bucket(tensor_list)
+    (tensor_list, sparse_info) = _serialize_sparse_tensors(
+        tensor_list, enqueue_many=False)
+
+    # Round-trip batch_size to a tensor, and possibly back
+    batch_size = ops.convert_to_tensor(
+        batch_size, dtype=dtypes.int32, name="batch_size")
+    static_batch_size = tensor_util.constant_value(batch_size)
+    batch_size = (
+        static_batch_size if static_batch_size is not None else batch_size)
+
+    types = _dtypes([tensor_list])
+    shapes = _shapes([tensor_list], shapes, enqueue_many=False)
+
+    which_bucket = ops.convert_to_tensor(
+        which_bucket, dtype=dtypes.int32, name="which_bucket")
+
+    queue_creator = _which_queue(dynamic_pad)
+    bucket_queues = []
+    for i in range(num_buckets):
+      shared_name_i = (
+          "%s_%d" % (shared_name, i) if shared_name is not None else None)
+      bucket_queues.append(
+          queue_creator(capacity=capacity,
+                        dtypes=types,
+                        shapes=shapes,
+                        shared_name=shared_name_i, name="bucket_queue_%d" % i))
+
+    maybe_static_batch_size = (
+        None if allow_smaller_final_batch else static_batch_size)
+
+    bucket_shapes = [tensor_shape.vector(maybe_static_batch_size).concatenate(s)
+                     for s in bucket_queues[0].shapes]
+    # top_queue is a PaddingFIFOQueue even if the bucket queues are regular FIFO
+    # queues because if we use allow_smaller_final_batch, shapes will
+    # contain Nones in their first entry; as a result, a regular
+    # FIFOQueue would die when being passed shapes that are not fully defined.
+    top_queue = data_flow_ops.PaddingFIFOQueue(
+        capacity=capacity,
+        dtypes=[dtypes.int32] + types,
+        shapes=[tensor_shape.scalar()] + bucket_shapes,
+        shared_name=shared_name, name="top_queue")
+
+    def enqueue_which():
+      def enqueue_single(i):
+        return bucket_queues[i].enqueue(tensor_list)
+      enqueues = [
+          control_flow_ops.cond(
+              math_ops.equal(which_bucket, i),
+              functools.partial(enqueue_single, i),
+              control_flow_ops.no_op)
+          for i in range(num_buckets)]
+      return control_flow_ops.group(*enqueues, name="group_enqueues")
+
+    if keep_input is not None:
+      # TODO(ebrevdo): Expand keep_input param to core training
+      # methods, and pipe through to _serialize_sparse_tensors; so
+      # that expensive serialization is guarded by keep_input.
+      maybe_enqueue = control_flow_ops.cond(
+          keep_input,
+          enqueue_which,
+          control_flow_ops.no_op)
+    else:
+      maybe_enqueue = enqueue_which()
+
+    bucket_enqueue_ops = [maybe_enqueue] * num_threads
+
+    if allow_smaller_final_batch:
+      which_dequeue = lambda q: q.dequeue_up_to
+    else:
+      which_dequeue = lambda q: q.dequeue_many
+
+    enqueues_to_top = [
+        top_queue.enqueue(
+            [constant_op.constant(i)] +
+            which_dequeue(q)(batch_size, name="read_bucket_%d" % i),
+            name="enqueue_from_bucket_%d" % i)
+        for i, q in enumerate(bucket_queues)]
+
+    for i, q in enumerate(bucket_queues):
+      queue_runner.add_queue_runner(queue_runner.QueueRunner(
+          q, [enqueues_to_top[i]],
+          queue_closed_exception_types=(
+              errors.OutOfRangeError, errors.CancelledError)))
+    queue_runner.add_queue_runner(queue_runner.QueueRunner(
+        top_queue, bucket_enqueue_ops,
+        queue_closed_exception_types=(
+            errors.OutOfRangeError, errors.CancelledError)))
+
+    for q in bucket_queues:
+      logging_ops.scalar_summary(
+          "bucket/%s/size" % q.name,
+          math_ops.cast(top_queue.size(), dtypes.float32))
+    logging_ops.scalar_summary(
+        "bucket/%s/fraction_of_%d_full" % (top_queue.name, capacity),
+        math_ops.cast(top_queue.size(), dtypes.float32) * (1. / capacity))
+
+    dequeued = top_queue.dequeue(name="dequeue_top")
+    which_bucket_dequeued = dequeued[0]
+    dequeued = dequeued[1:]
+    dequeued = _deserialize_sparse_tensors(dequeued, sparse_info)
+    return (which_bucket_dequeued, _as_original_type(tensors, dequeued))
+
+
+def bucket_by_sequence_length(input_length,
+                              tensors,
+                              batch_size,
+                              bucket_boundaries,
+                              num_threads=1,
+                              capacity=32,
+                              shapes=None,
+                              dynamic_pad=False,
+                              allow_smaller_final_batch=False,
+                              keep_input=None,
+                              shared_name=None,
+                              name=None):
+  """Lazy bucketing of inputs according to their length.
+
+  This method calls `tf.contrib.training.bucket` under the hood, after first
+  subdividing the bucket boundaries into separate buckets and identifying which
+  bucket the given `input_length` belongs to.  See the documentation for
+  `which_bucket` for details of the other arguments.
+
+  Args:
+    input_length: `int32` scalar `Tensor`, the sequence length of tensors.
+    tensors: The list or dictionary of tensors, representing a single element,
+      to bucket.  Nested lists are not supported.
+    batch_size: The new batch size pulled from the queue
+      (python int or int32 scalar).
+    bucket_boundaries: int list, increasing non-negative numbers.
+      The edges of the buckets to use when bucketing tensors.  Two extra buckets
+      are created, one for `input_length < bucket_boundaries[0]` and
+      one for `input_length >= bucket_boundaries[-1]`.
+    num_threads: An integer.  The number of threads enqueuing `tensors`.
+    capacity: An integer. The maximum number of minibatches in the top queue,
+      and also the maximum number of elements within each bucket.
+    shapes: (Optional) The shapes for each example.  Defaults to the
+      inferred shapes for `tensors`.
+    dynamic_pad: Boolean.  Allow variable dimensions in input shapes.
+      The given dimensions are padded upon dequeue so that tensors within a
+      batch have the same shapes.
+    allow_smaller_final_batch: (Optional) Boolean. If `True`, allow the final
+      batches to be smaller if there are insufficient items left in the queues.
+    keep_input: (Optional).  A `bool` scalar Tensor.  If provided, this tensor
+      controls whether the input is added to the queue or not.  If it evaluates
+      `True`, then `tensors` are added to the bucket; otherwise they are
+      dropped.  This tensor essentially acts as a filtering mechanism.
+      The default behavior is to assume `keep_input=True`.
+    shared_name: (Optional). If set, the queues will be shared under the given
+      name across multiple sessions.
+    name: (Optional) A name for the operations.
+
+  Returns:
+    A tuple `(sequence_length, outputs)` where `sequence_length` is
+    a 1-D `Tensor` of size `batch_size` and `outputs` is a list or dictionary
+    of batched, bucketed, outputs corresponding to elements of `tensors`.
+
+  Raises:
+    TypeError: if `bucket_boundaries` is not a list of python integers.
+    ValueError: if `bucket_boundaries` is empty or contains non-increasing
+      values.
+  """
+  tensor_list = _as_tensor_list(tensors)
+  if not isinstance(bucket_boundaries, (list, tuple)):
+    raise TypeError(
+        "bucket_boundaries must be a list or tuple, but received: %s"
+        % bucket_boundaries)
+  if not bucket_boundaries:
+    raise ValueError("bucket_boundaries must not be empty")
+  for (s, e) in zip(bucket_boundaries[:-1], bucket_boundaries[1:]):
+    if not isinstance(s, int) or not isinstance(e, int):
+      raise TypeError(
+          "bucket boundaries must be integers, but saw: %s and %s" % (s, e))
+    if s >= e:
+      raise ValueError(
+          "Buckets must contain sequential increasing lengths, but saw: "
+          "%d before %d" % (s, e))
+
+  with ops.name_scope(name, "bucket_by_sequence_length",
+                      [input_length] + tensor_list) as name:
+    input_length = ops.convert_to_tensor(
+        input_length, dtype=dtypes.int32, name="input_length")
+    # Bucketing conditions are:
+    #   l < b[0]
+    #   b[0] <= l < b[1]
+    #   b[1] <= l < b[2]
+    #   ...
+    #   b[N-2] <= l < b[N-1]
+    #   b[N-1] <= l
+    # Equivalent to:
+    #   [-inf, b[0], b[1], ..., b[N-1]] <= l < [b[0], b[1], ..., b[N-1], inf]
+    buckets_min = [np.iinfo(np.int32).min] + list(bucket_boundaries)
+    buckets_max = list(bucket_boundaries) + [np.iinfo(np.int32).max]
+    conditions_c = math_ops.logical_and(
+        math_ops.less_equal(buckets_min, input_length),
+        math_ops.less(input_length, buckets_max))
+    which_bucket = math_ops.reduce_min(array_ops.where(conditions_c))
+    which_bucket = math_ops.to_int32(which_bucket)
+
+    if shapes is not None:
+      shapes = [tensor_shape.scalar()] + shapes
+
+    _, dequeued = bucket(
+        tensors=[input_length] + tensor_list,
+        which_bucket=which_bucket,
+        batch_size=batch_size,
+        num_buckets=len(bucket_boundaries) + 1,
+        num_threads=num_threads,
+        capacity=capacity,
+        shapes=shapes,
+        dynamic_pad=dynamic_pad,
+        allow_smaller_final_batch=allow_smaller_final_batch,
+        keep_input=keep_input,
+        shared_name=shared_name)
+
+    return (dequeued[0], _as_original_type(tensors, dequeued[1:]))
+
+
+__all__ = [
+    "bucket",
+    "bucket_by_sequence_length"
+]
diff --git a/tensorflow/contrib/training/python/training/bucket_ops_test.py b/tensorflow/contrib/training/python/training/bucket_ops_test.py
new file mode 100644
index 00000000000..587cf9411ce
--- /dev/null
+++ b/tensorflow/contrib/training/python/training/bucket_ops_test.py
@@ -0,0 +1,356 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for tf.contrib.training.bucket."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import random
+
+import numpy as np
+import tensorflow as tf
+
+
+def _which_bucket(bucket_edges, v):
+  """Identify which bucket v falls into.
+
+  Args:
+    bucket_edges: int array, bucket edges
+    v: int scalar, index
+  Returns:
+    int scalar, the bucket.
+    If v < bucket_edges[0], return 0.
+    If bucket_edges[0] <= v < bucket_edges[1], return 1.
+    ...
+    If bucket_edges[-2] <= v < bucket_edges[-1], return len(bucket_edges).
+    If v >= bucket_edges[-1], return len(bucket_edges) + 1
+  """
+  v = np.asarray(v)
+  full = [0] + bucket_edges
+  found = np.where(np.logical_and(v >= full[:-1], v < full[1:]))[0]
+  if not found.size:
+    return len(full)
+  return found[0]
+
+
+class BucketTest(tf.test.TestCase):
+
+  def setUp(self):
+    tf.reset_default_graph()
+
+    self.scalar_int_feed = tf.placeholder(tf.int32, ())
+    self.unk_int64_feed = tf.placeholder(tf.int64, (None,))
+    self.vec3_str_feed = tf.placeholder(tf.string, (3,))
+
+    self._coord = tf.train.Coordinator()
+    # Make capacity very large so we can feed all the inputs in the
+    # main thread without blocking
+    input_queue = tf.PaddingFIFOQueue(
+        5000,
+        dtypes=[tf.int32, tf.int64, tf.string],
+        shapes=[(), (None,), (3,)])
+
+    self._input_enqueue_op = input_queue.enqueue(
+        (self.scalar_int_feed, self.unk_int64_feed, self.vec3_str_feed))
+    self.scalar_int, self.unk_int64, self.vec3_str = input_queue.dequeue()
+    self._threads = None
+    self._close_op = input_queue.close()
+    self._sess = None
+
+  def enqueue_inputs(self, sess, feed_dict):
+    sess.run(self._input_enqueue_op, feed_dict=feed_dict)
+
+  def start_queue_runners(self, sess):
+    # Store session to be able to close inputs later
+    if self._sess is None:
+      self._sess = sess
+    self._threads = tf.train.start_queue_runners(coord=self._coord)
+
+  def tearDown(self):
+    if self._sess is not None:
+      self._sess.run(self._close_op)
+    self._coord.request_stop()
+    self._coord.join(self._threads)
+
+  def testSingleBucket(self):
+    bucketed_dynamic = tf.contrib.training.bucket(
+        tensors=[self.scalar_int, self.unk_int64, self.vec3_str],
+        which_bucket=tf.constant(0),
+        num_buckets=2,
+        batch_size=32,
+        num_threads=10,
+        dynamic_pad=True)
+    # Check shape inference on bucketing outputs
+    self.assertAllEqual(
+        [[32], [32, None], [32, 3]],
+        [out.get_shape().as_list() for out in bucketed_dynamic[1]])
+    with self.test_session() as sess:
+      for v in range(32):
+        self.enqueue_inputs(
+            sess,
+            {self.scalar_int_feed: v,
+             self.unk_int64_feed: v * [v],
+             self.vec3_str_feed: 3 * [str(v)]})
+      self.start_queue_runners(sess)
+
+      # Get a single minibatch
+      bucketed_values = sess.run(bucketed_dynamic)
+
+      # (which_bucket, bucket_tensors).
+      self.assertEqual(2, len(bucketed_values))
+
+      # Count number of bucket_tensors.
+      self.assertEqual(3, len(bucketed_values[1]))
+
+      # Ensure bucket 0 was used for all minibatch entries.
+      self.assertAllEqual(0, bucketed_values[0])
+
+      expected_scalar_int = np.arange(32)
+      expected_unk_int64 = np.zeros((32, 31)).astype(np.int64)
+      for i in range(32):
+        expected_unk_int64[i, :i] = i
+      expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T
+
+      # Must resort the output because num_threads > 1 leads to
+      # sometimes-inconsistent insertion order.
+      resort = np.argsort(bucketed_values[1][0])
+      self.assertAllEqual(expected_scalar_int, bucketed_values[1][0][resort])
+      self.assertAllEqual(expected_unk_int64, bucketed_values[1][1][resort])
+      self.assertAllEqual(expected_vec3_str, bucketed_values[1][2][resort])
+
+  def testEvenOddBuckets(self):
+    which_bucket = (self.scalar_int % 2)
+    bucketed_dynamic = tf.contrib.training.bucket(
+        tensors=[self.scalar_int, self.unk_int64, self.vec3_str],
+        which_bucket=which_bucket,
+        num_buckets=2,
+        batch_size=32,
+        num_threads=10,
+        dynamic_pad=True)
+    # Check shape inference on bucketing outputs
+    self.assertAllEqual(
+        [[32], [32, None], [32, 3]],
+        [out.get_shape().as_list() for out in bucketed_dynamic[1]])
+    with self.test_session() as sess:
+      for v in range(64):
+        self.enqueue_inputs(
+            sess,
+            {self.scalar_int_feed: v,
+             self.unk_int64_feed: v * [v],
+             self.vec3_str_feed: 3 * [str(v)]})
+      self.start_queue_runners(sess)
+
+      # Get two minibatches (one containing even values, one containing odds)
+      bucketed_values_0 = sess.run(bucketed_dynamic)
+      bucketed_values_1 = sess.run(bucketed_dynamic)
+
+      # (which_bucket, bucket_tensors).
+      self.assertEqual(2, len(bucketed_values_0))
+      self.assertEqual(2, len(bucketed_values_1))
+
+      # Count number of bucket_tensors.
+      self.assertEqual(3, len(bucketed_values_0[1]))
+      self.assertEqual(3, len(bucketed_values_1[1]))
+
+      # Figure out which output has the even values (there's
+      # randomness due to the multithreaded nature of bucketing)
+      if bucketed_values_0[0] % 2 == 1:
+        bucketed_values_even, bucketed_values_odd = (
+            bucketed_values_1, bucketed_values_0)
+      else:
+        bucketed_values_even, bucketed_values_odd = (
+            bucketed_values_0, bucketed_values_1)
+
+      # Ensure bucket 0 was used for all minibatch entries.
+      self.assertAllEqual(0, bucketed_values_even[0])
+      self.assertAllEqual(1, bucketed_values_odd[0])
+
+      # Test the first bucket outputted, the events starting at 0
+      expected_scalar_int = np.arange(0, 32 * 2, 2)
+      expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64)
+      for i in range(0, 32):
+        expected_unk_int64[i, :2*i] = 2*i
+      expected_vec3_str = np.vstack(
+          3 * [np.arange(0, 32 * 2, 2).astype(bytes)]).T
+
+      # Must resort the output because num_threads > 1 leads to
+      # sometimes-inconsistent insertion order.
+      resort = np.argsort(bucketed_values_even[1][0])
+      self.assertAllEqual(expected_scalar_int,
+                          bucketed_values_even[1][0][resort])
+      self.assertAllEqual(expected_unk_int64,
+                          bucketed_values_even[1][1][resort])
+      self.assertAllEqual(expected_vec3_str,
+                          bucketed_values_even[1][2][resort])
+
+      # Test the second bucket outputted, the odds starting at 1
+      expected_scalar_int = np.arange(1, 32 * 2 + 1, 2)
+      expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64)
+      for i in range(0, 32):
+        expected_unk_int64[i, :2*i + 1] = 2*i + 1
+      expected_vec3_str = np.vstack(
+          3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T
+
+      # Must resort the output because num_threads > 1 leads to
+      # sometimes-inconsistent insertion order.
+      resort = np.argsort(bucketed_values_odd[1][0])
+      self.assertAllEqual(expected_scalar_int,
+                          bucketed_values_odd[1][0][resort])
+      self.assertAllEqual(expected_unk_int64,
+                          bucketed_values_odd[1][1][resort])
+      self.assertAllEqual(expected_vec3_str,
+                          bucketed_values_odd[1][2][resort])
+
+  def testEvenOddBucketsFilterOutAllOdd(self):
+    which_bucket = (self.scalar_int % 2)
+    keep_input = tf.equal(which_bucket, 0)
+    bucketed_dynamic = tf.contrib.training.bucket(
+        tensors=[self.scalar_int, self.unk_int64, self.vec3_str],
+        which_bucket=which_bucket,
+        num_buckets=2,
+        batch_size=32,
+        num_threads=10,
+        keep_input=keep_input,
+        dynamic_pad=True)
+    # Check shape inference on bucketing outputs
+    self.assertAllEqual(
+        [[32], [32, None], [32, 3]],
+        [out.get_shape().as_list() for out in bucketed_dynamic[1]])
+    with self.test_session() as sess:
+      for v in range(128):
+        self.enqueue_inputs(
+            sess,
+            {self.scalar_int_feed: v,
+             self.unk_int64_feed: v * [v],
+             self.vec3_str_feed: 3 * [str(v)]})
+      self.start_queue_runners(sess)
+
+      # Get two minibatches ([0, 2, ...] and [64, 66, ...])
+      bucketed_values_even0 = sess.run(bucketed_dynamic)
+      bucketed_values_even1 = sess.run(bucketed_dynamic)
+
+      # Ensure that bucket 1 was completely filtered out
+      self.assertAllEqual(0, bucketed_values_even0[0])
+      self.assertAllEqual(0, bucketed_values_even1[0])
+
+      # Merge their output for sorting and comparison
+      bucketed_values_all_elem0 = np.concatenate(
+          (bucketed_values_even0[1][0],
+           bucketed_values_even1[1][0]))
+
+      self.assertAllEqual(
+          np.arange(0, 128, 2), sorted(bucketed_values_all_elem0))
+
+
+class BucketBySequenceLengthTest(tf.test.TestCase):
+
+  def _testBucketBySequenceLength(self, allow_small_batch):
+    tf.reset_default_graph()
+
+    # All inputs must be identical lengths across tuple index.
+    # The input reader will get input_length from the first tuple
+    # entry.
+    data_len = 4
+    target_len = 3
+    input_pairs = [
+        (length,
+         ([np.int64(length)] * data_len,
+          [str(length).encode("ascii")] * target_len))
+        for length in (1, 3, 4, 5, 6, 10)]
+
+    lengths = tf.placeholder(tf.int32, ())
+    data = tf.placeholder(tf.int64, (data_len,))
+    targets = tf.placeholder(tf.string, (target_len,))
+
+    batch_size = 8
+    bucket_boundaries = [3, 4, 5, 10]
+
+    # Make capacity very large so we can feed all the inputs in the
+    # main thread without blocking
+    input_queue = tf.FIFOQueue(
+        5000, (tf.int32, tf.int64, tf.string),
+        ((), (data_len,), (target_len,)))
+    input_enqueue_op = input_queue.enqueue((lengths, data, targets))
+    lengths_t, data_t, targets_t = input_queue.dequeue()
+    close_input_op = input_queue.close()
+
+    (out_lengths_t, data_and_targets_t) = (
+        tf.contrib.training.bucket_by_sequence_length(
+            input_length=lengths_t,
+            tensors=[data_t, targets_t],
+            batch_size=batch_size,
+            bucket_boundaries=bucket_boundaries,
+            allow_smaller_final_batch=allow_small_batch,
+            num_threads=10))
+
+    expected_batch_size = None if allow_small_batch else batch_size
+    self.assertEqual(out_lengths_t.get_shape().as_list(),
+                     [expected_batch_size])
+    self.assertEqual(data_and_targets_t[0].get_shape().as_list(),
+                     [expected_batch_size, data_len])
+    self.assertEqual(data_and_targets_t[1].get_shape().as_list(),
+                     [expected_batch_size, target_len])
+
+    def _read_test(sess):
+      for _ in range(50):
+        (out_lengths, (data, targets)) = sess.run(
+            (out_lengths_t, data_and_targets_t))
+        if allow_small_batch:
+          self.assertEqual(data_len, data.shape[1])
+          self.assertEqual(target_len, targets.shape[1])
+          self.assertGreaterEqual(batch_size, out_lengths.shape[0])
+          self.assertGreaterEqual(batch_size, data.shape[0])
+          self.assertGreaterEqual(batch_size, targets.shape[0])
+        else:
+          self.assertEqual((batch_size, data_len), data.shape)
+          self.assertEqual((batch_size, target_len), targets.shape)
+          self.assertEqual((batch_size,), out_lengths.shape)
+        for (lr, dr, tr) in zip(out_lengths, data, targets):
+          # Make sure length matches data (here it's the same value)
+          self.assertEqual(dr[0], lr)
+          # Make sure data & targets match
+          self.assertEqual(dr[0], int(tr[0].decode("ascii")))
+          # Make sure for each row, data came from the same bucket.
+          self.assertEqual(_which_bucket(bucket_boundaries, dr[0]),
+                           _which_bucket(bucket_boundaries, dr[1]))
+
+    with self.test_session() as sess:
+      coord = tf.train.Coordinator()
+
+      # Feed the inputs, then close the input thread.
+      for _ in range(50 * batch_size + 100):
+        which = random.randint(0, len(input_pairs) - 1)
+        length, pair = input_pairs[which]
+        sess.run(input_enqueue_op, feed_dict={
+            lengths: length, data: pair[0], targets: pair[1]})
+      sess.run(close_input_op)
+
+      # Start the queue runners
+      threads = tf.train.start_queue_runners(coord=coord)
+      # Read off the top of the bucket and ensure correctness of output
+      _read_test(sess)
+      coord.request_stop()
+      coord.join(threads)
+
+  def testBucketBySequenceLength(self):
+    self._testBucketBySequenceLength(allow_small_batch=False)
+
+  def testBucketBySequenceLengthAllow(self):
+    self._testBucketBySequenceLength(allow_small_batch=True)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 45d6b648bca..5a8b8c86e52 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -171,7 +171,6 @@ cc_library(
         "platform/env.h",
         "platform/file_system.h",
         "platform/fingerprint.h",
-        "platform/hexagon/profile_utils/cpu_utils.h",
         "platform/init_main.h",
         "platform/logging.h",
         "platform/macros.h",
@@ -179,6 +178,7 @@ cc_library(
         "platform/net.h",
         "platform/mutex.h",
         "platform/notification.h",
+        "platform/profile_utils/cpu_utils.h",
         "platform/protobuf.h",  # TODO(josh11b): make internal
         "platform/regexp.h",
         "platform/strong_hash.h",
@@ -862,8 +862,8 @@ cc_library(
             "lib/**/*.cc",
             "platform/*.h",
             "platform/*.cc",
-            "platform/hexagon/**/*.h",
-            "platform/hexagon/**/*.cc",
+            "platform/profile_utils/**/*.h",
+            "platform/profile_utils/**/*.cc",
         ] + tf_additional_lib_srcs(),
         exclude = [
             "**/*test*",
@@ -891,7 +891,7 @@ cc_library(
         "lib/io/snappy/snappy_inputbuffer.h",
         "lib/io/snappy/snappy_outputbuffer.h",
         "lib/io/zlib_compression_options.h",
-        "lib/io/zlib_inputbuffer.h",
+        "lib/io/zlib_inputstream.h",
         "lib/io/zlib_outputbuffer.h",
         "lib/jpeg/jpeg_handle.h",
         "lib/png/png_io.h",
@@ -1348,11 +1348,11 @@ tf_cc_tests(
         "lib/strings/stringprintf_test.cc",
         "lib/wav/wav_io_test.cc",
         "platform/fingerprint_test.cc",
-        "platform/hexagon/profile_utils/cpu_utils_test.cc",
         "platform/integral_types_test.cc",
         "platform/logging_test.cc",
         "platform/net_test.cc",
         "platform/port_test.cc",
+        "platform/profile_utils/cpu_utils_test.cc",
     ],
     deps = [
         ":lib",
diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc
index 70b01d6485a..f525d1d9812 100644
--- a/tensorflow/core/common_runtime/bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/bfc_allocator.cc
@@ -157,7 +157,7 @@ bool BFCAllocator::Extend(size_t rounded_bytes) {
   InsertFreeChunkIntoBin(h);
 
   // Invoke visitors on newly allocated region.
-  for (auto visitor : region_visitors_) {
+  for (const auto& visitor : region_visitors_) {
     visitor(mem_addr, bytes);
   }
   return true;
diff --git a/tensorflow/core/common_runtime/constant_folding.cc b/tensorflow/core/common_runtime/constant_folding.cc
index 9bd162b72fd..6a49c940b3e 100644
--- a/tensorflow/core/common_runtime/constant_folding.cc
+++ b/tensorflow/core/common_runtime/constant_folding.cc
@@ -279,7 +279,7 @@ bool ReplaceTensorWithConstant(Graph* graph, Device* partition_device,
       edges_to_remove.push_back(out_edge);
     }
   }
-  string node_name = n->name();
+  const string& node_name = n->name();
   Node* constant_node;
   auto builder = NodeDefBuilder(strings::StrCat(graph->NewName(node_name),
                                                 "__cf__", UniqueConstantId()),
diff --git a/tensorflow/core/common_runtime/copy_tensor.cc b/tensorflow/core/common_runtime/copy_tensor.cc
index 5dc8c33b2a7..e55ef7d5ba9 100644
--- a/tensorflow/core/common_runtime/copy_tensor.cc
+++ b/tensorflow/core/common_runtime/copy_tensor.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/copy_tensor.h"
 
 #include <atomic>
+#include <utility>
 #include <vector>
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/logging.h"
@@ -26,7 +27,9 @@ namespace {
 
 struct RegistrationInfo {
   RegistrationInfo(DeviceType s, DeviceType r, CopyTensor::CopyFunction cf)
-      : sender_device_type(s), receiver_device_type(r), copy_function(cf) {}
+      : sender_device_type(std::move(s)),
+        receiver_device_type(r),
+        copy_function(cf) {}
   DeviceType sender_device_type;
   DeviceType receiver_device_type;
   CopyTensor::CopyFunction copy_function;
diff --git a/tensorflow/core/common_runtime/device_set.cc b/tensorflow/core/common_runtime/device_set.cc
index 98c6c3843ce..8ff93760d49 100644
--- a/tensorflow/core/common_runtime/device_set.cc
+++ b/tensorflow/core/common_runtime/device_set.cc
@@ -71,9 +71,9 @@ std::vector<DeviceType> DeviceSet::PrioritizedDeviceTypeList() const {
   std::vector<DeviceType> result;
   std::set<string> seen;
   for (Device* d : devices_) {
-    auto t = d->device_type();
+    const auto& t = d->device_type();
     if (seen.insert(t).second) {
-      result.emplace_back(DeviceType(t));
+      result.emplace_back(t);
     }
   }
   std::sort(result.begin(), result.end(), DeviceTypeComparator);
diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc
index 6aedcf4e7e8..4c90226231c 100644
--- a/tensorflow/core/common_runtime/direct_session.cc
+++ b/tensorflow/core/common_runtime/direct_session.cc
@@ -26,7 +26,6 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/gpu/gpu_tracer.h"
 #include "tensorflow/core/common_runtime/graph_optimizer.h"
 #include "tensorflow/core/common_runtime/memory_types.h"
-#include "tensorflow/core/common_runtime/session_factory.h"
 #include "tensorflow/core/common_runtime/simple_placer.h"
 #include "tensorflow/core/common_runtime/step_stats_collector.h"
 #include "tensorflow/core/framework/function.h"
@@ -113,6 +112,77 @@ string GetRendezvousKey(const string& tensor_name,
 
 }  // namespace
 
+class DirectSessionFactory : public SessionFactory {
+ public:
+  DirectSessionFactory() {}
+
+  bool AcceptsOptions(const SessionOptions& options) override {
+    return options.target.empty();
+  }
+
+  Session* NewSession(const SessionOptions& options) override {
+    // Must do this before the CPU allocator is created.
+    if (options.config.graph_options().build_cost_model() > 0) {
+      EnableCPUAllocatorFullStats(true);
+    }
+    std::vector<Device*> devices;
+    Status s = DeviceFactory::AddDevices(
+        options, "/job:localhost/replica:0/task:0", &devices);
+    if (!s.ok()) {
+      LOG(ERROR) << s;
+      return nullptr;
+    }
+
+    DirectSession* session =
+        new DirectSession(options, new DeviceMgr(devices), this);
+    {
+      mutex_lock l(sessions_lock_);
+      sessions_.push_back(session);
+    }
+    return session;
+  }
+
+  Status Reset(const SessionOptions& options,
+               const std::vector<string>& containers) override {
+    std::vector<DirectSession*> sessions_to_reset;
+    {
+      mutex_lock l(sessions_lock_);
+      // We create a copy to ensure that we don't have a deadlock when
+      // session->Close calls the DirectSessionFactory.Deregister, which
+      // acquires sessions_lock_.
+      std::swap(sessions_to_reset, sessions_);
+    }
+    Status s;
+    for (auto session : sessions_to_reset) {
+      s.Update(session->Reset(containers));
+    }
+    // TODO(suharshs): Change the Reset behavior of all SessionFactories so that
+    // it doesn't close the sessions?
+    for (auto session : sessions_to_reset) {
+      s.Update(session->Close());
+    }
+    return s;
+  }
+
+  void Deregister(const DirectSession* session) {
+    mutex_lock l(sessions_lock_);
+    sessions_.erase(std::remove(sessions_.begin(), sessions_.end(), session),
+                    sessions_.end());
+  }
+
+ private:
+  mutex sessions_lock_;
+  std::vector<DirectSession*> sessions_ GUARDED_BY(sessions_lock_);
+};
+
+class DirectSessionRegistrar {
+ public:
+  DirectSessionRegistrar() {
+    SessionFactory::Register("DIRECT_SESSION", new DirectSessionFactory());
+  }
+};
+static DirectSessionRegistrar registrar;
+
 std::atomic_int_fast64_t DirectSession::step_id_counter_(1);
 
 // NOTE: On Android with a single device, there is never
@@ -146,10 +216,13 @@ void DirectSession::SchedClosure(thread::ThreadPool* pool,
 }
 
 DirectSession::DirectSession(const SessionOptions& options,
-                             const DeviceMgr* device_mgr)
+                             const DeviceMgr* device_mgr,
+                             DirectSessionFactory* const factory)
     : options_(options),
       device_mgr_(device_mgr),
+      factory_(factory),
       cancellation_manager_(new CancellationManager()),
+      closed_(false),
       operation_timeout_in_ms_(options_.config.operation_timeout_in_ms()) {
   if (options_.config.session_inter_op_thread_pool_size() > 0) {
     for (int i = 0; i < options_.config.session_inter_op_thread_pool_size();
@@ -194,6 +267,7 @@ DirectSession::DirectSession(const SessionOptions& options,
 }
 
 DirectSession::~DirectSession() {
+  if (!closed_) Close();
   for (auto& it : partial_runs_) {
     it.second.reset(nullptr);
   }
@@ -237,6 +311,7 @@ Status DirectSession::Create(const GraphDef& graph) {
 }
 
 Status DirectSession::Extend(const GraphDef& graph) {
+  TF_RETURN_IF_ERROR(CheckNotClosed());
   mutex_lock l(graph_def_lock_);
   return ExtendLocked(graph);
 }
@@ -267,6 +342,7 @@ Status DirectSession::Run(const RunOptions& run_options,
                           const std::vector<string>& target_nodes,
                           std::vector<Tensor>* outputs,
                           RunMetadata* run_metadata) {
+  TF_RETURN_IF_ERROR(CheckNotClosed());
   direct_session_runs->GetCell()->IncrementBy(1);
   {
     mutex_lock l(graph_def_lock_);
@@ -412,6 +488,7 @@ Status DirectSession::PRunSetup(const std::vector<string>& input_names,
                                 const std::vector<string>& output_names,
                                 const std::vector<string>& target_nodes,
                                 string* handle) {
+  TF_RETURN_IF_ERROR(CheckNotClosed());
   {
     mutex_lock l(graph_def_lock_);
     if (!graph_created_) {
@@ -487,6 +564,7 @@ Status DirectSession::PRunSetup(const std::vector<string>& input_names,
 Status DirectSession::PRun(const string& handle, const NamedTensorList& inputs,
                            const std::vector<string>& output_names,
                            std::vector<Tensor>* outputs) {
+  TF_RETURN_IF_ERROR(CheckNotClosed());
   std::vector<string> parts = str_util::Split(handle, ';');
   const string& key = parts[0];
   // Get the executors for this partial run.
@@ -1002,8 +1080,20 @@ Status DirectSession::CreateGraphs(
   return s;
 }
 
+::tensorflow::Status DirectSession::Reset(
+    const std::vector<string>& containers) {
+  device_mgr_->ClearContainers(containers);
+  return ::tensorflow::Status::OK();
+}
+
 ::tensorflow::Status DirectSession::Close() {
   cancellation_manager_->StartCancel();
+  {
+    mutex_lock l(mu_);
+    if (closed_) return ::tensorflow::Status::OK();
+    closed_ = true;
+  }
+  if (factory_ != nullptr) factory_->Deregister(this);
   return ::tensorflow::Status::OK();
 }
 
@@ -1051,37 +1141,4 @@ void DirectSession::WaitForNotification(RunState* run_state,
   }
 }
 
-class DirectSessionFactory : public SessionFactory {
- public:
-  DirectSessionFactory() {}
-
-  bool AcceptsOptions(const SessionOptions& options) override {
-    return options.target.empty();
-  }
-
-  Session* NewSession(const SessionOptions& options) override {
-    // Must do this before the CPU allocator is created.
-    if (options.config.graph_options().build_cost_model() > 0) {
-      EnableCPUAllocatorFullStats(true);
-    }
-    std::vector<Device*> devices;
-    Status s = DeviceFactory::AddDevices(
-        options, "/job:localhost/replica:0/task:0", &devices);
-    if (!s.ok()) {
-      LOG(ERROR) << s;
-      return nullptr;
-    }
-
-    return new DirectSession(options, new DeviceMgr(devices));
-  }
-};
-
-class DirectSessionRegistrar {
- public:
-  DirectSessionRegistrar() {
-    SessionFactory::Register("DIRECT_SESSION", new DirectSessionFactory());
-  }
-};
-static DirectSessionRegistrar registrar;
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h
index dcb2c584c82..8681d8fb7c4 100644
--- a/tensorflow/core/common_runtime/direct_session.h
+++ b/tensorflow/core/common_runtime/direct_session.h
@@ -28,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/common_runtime/executor.h"
 #include "tensorflow/core/common_runtime/rendezvous_mgr.h"
+#include "tensorflow/core/common_runtime/session_factory.h"
 #include "tensorflow/core/common_runtime/simple_graph_execution_state.h"
 #include "tensorflow/core/debug/debug_graph_utils.h"
 #include "tensorflow/core/framework/cancellation.h"
@@ -47,11 +48,18 @@ namespace tensorflow {
 class CostModel;
 class DebugGateway;
 class Device;
+class DirectSessionFactory;
 
 class DirectSession : public Session {
  public:
+  typedef std::function<void(Session*)> CloseCallback;
+
   // Takes ownership of 'device_mgr'.
-  DirectSession(const SessionOptions& options, const DeviceMgr* device_mgr);
+  // 'factory' is used to unregister the DirectSession with 'factory' when its
+  // closed. This ensures that Reset requests from the 'factory' don't get sent
+  // to sessions that are already closed.
+  DirectSession(const SessionOptions& options, const DeviceMgr* device_mgr,
+                DirectSessionFactory* factory);
   ~DirectSession() override;
 
   typedef std::vector<std::pair<string, Tensor>> NamedTensorList;
@@ -83,6 +91,10 @@ class DirectSession : public Session {
                             const std::vector<string>& output_names,
                             std::vector<Tensor>* outputs) override;
 
+  // Reset clears 'containers' from the device_mgr of the DirectSession.
+  // If 'containers' is empty, then Reset clears the default container.
+  ::tensorflow::Status Reset(const std::vector<string>& containers);
+
   ::tensorflow::Status Close() override;
 
   void ExportCostModels(CostModelManager::CostModelMap* cost_models) {
@@ -198,6 +210,12 @@ class DirectSession : public Session {
   // operation_timeout_in_ms is greater than 0.
   void WaitForNotification(RunState* run_state, int64 timeout_in_ms);
 
+  ::tensorflow::Status CheckNotClosed() {
+    mutex_lock l(mu_);
+    if (closed_) return errors::Cancelled("Session has been closed.");
+    return ::tensorflow::Status::OK();
+  }
+
   const SessionOptions options_;
 
   // Device structures.
@@ -232,10 +250,12 @@ class DirectSession : public Session {
   // This holds all the tensors that are currently alive in the session.
   SessionState session_state_;
 
+  DirectSessionFactory* const factory_;  // not owned
   CancellationManager* cancellation_manager_;
 
   // Saves and restores device placements for stateful nodes.
   mutex mu_;
+
   // Map of placed stateful nodes, i.e. nodes for which is_stateful()
   // is true, such as "params" and "queue" nodes.  Once placed these
   // nodes can not be moved to a different device.  Maps node names to
@@ -251,6 +271,9 @@ class DirectSession : public Session {
   // library; it copies and modifies the function library.
   std::unique_ptr<FunctionLibraryDefinition> flib_def_;
 
+  // true if the Session has been Closed.
+  bool closed_ GUARDED_BY(mu_);
+
   // For generating unique names.
   int64 name_counter_ GUARDED_BY(mu_) = 0;
 
diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc
index 380f2ca8fd6..4eb48c7bcf3 100644
--- a/tensorflow/core/common_runtime/direct_session_test.cc
+++ b/tensorflow/core/common_runtime/direct_session_test.cc
@@ -397,6 +397,14 @@ TEST(DirectSessionTest, MultipleFeedTest) {
   ASSERT_EQ(2, outputs.size());
   ASSERT_EQ(11.0, outputs[0].flat<float>()(0));
   ASSERT_EQ(22.0, outputs[1].flat<float>()(0));
+
+  // Feed [first_const, first_const]
+  s = session->Run(
+      {{first_const->name(), value_11}, {first_const->name(), value_22}},
+      {first_identity->name() + ":0", second_identity->name() + ":0"}, {},
+      &outputs);
+  EXPECT_TRUE(errors::IsInvalidArgument(s));
+  EXPECT_TRUE(StringPiece(s.error_message()).contains("fed more than once"));
 }
 
 REGISTER_OP("Darth")
@@ -970,5 +978,129 @@ TEST(DirectSessionTest, TestSessionInterOpThreadsInvalidOptions) {
   }
 }
 
+TEST(DirectSessionTest, TestDirectSessionRunClose) {
+  // Construct a graph with a variable and a single assign.
+  Graph g(OpRegistry::Global());
+  Tensor t(DT_FLOAT, TensorShape({}));
+  t.scalar<float>()() = {1.2};
+  Node* var_val = test::graph::Constant(&g, t);
+  Node* var = test::graph::Var(&g, DT_FLOAT, {});
+  Node* var_assign = test::graph::Assign(&g, var, var_val);
+  GraphDef def;
+  test::graph::ToGraphDef(&g, &def);
+
+  SessionOptions options;
+  (*options.config.mutable_device_count())["CPU"] = 2;
+  std::unique_ptr<Session> session(NewSession(options));
+  ASSERT_TRUE(session != nullptr);
+  TF_ASSERT_OK(session->Create(def));
+
+  // Assign a value to the var.
+  TF_ASSERT_OK(session->Run({} /* inputs */, {},
+                            {var_assign->name()} /* target_nodes */, nullptr));
+
+  // Run a read on the variable to ensure that it works.
+  std::vector<Tensor> outputs;
+  TF_ASSERT_OK(session->Run(
+      {} /* inputs */, {var->name() + ":0"} /* output_names */, {}, &outputs));
+  EXPECT_EQ(t.scalar<float>()(), outputs[0].scalar<float>()());
+  outputs.clear();
+
+  // Close the session.
+  session->Close();
+
+  // Run the read on the variable to get an error.
+  Status s = session->Run({} /* inputs */, {},
+                          {var_assign->name()} /* target_nodes */, nullptr);
+  EXPECT_EQ("Cancelled: Session has been closed.", s.ToString());
+}
+
+TEST(DirectSessionTest, TestDirectSessionPRunClose) {
+  GraphDef def;
+  Graph g(OpRegistry::Global());
+
+  Tensor first_value(DT_FLOAT, TensorShape({}));
+  first_value.scalar<float>()() = 1.0;
+  Node* first_const = test::graph::Constant(&g, first_value);
+  Node* first_identity = test::graph::Identity(&g, first_const);
+
+  Tensor second_value(DT_FLOAT, TensorShape({}));
+  second_value.scalar<float>()() = 2.0;
+  Node* second_const = test::graph::Constant(&g, second_value);
+  Node* second_identity = test::graph::Identity(&g, second_const);
+
+  Node* third = test::graph::Add(&g, first_identity, second_identity);
+  Node* third_identity = test::graph::Identity(&g, third);
+
+  test::graph::ToGraphDef(&g, &def);
+
+  std::unique_ptr<Session> session(CreateSession());
+  ASSERT_TRUE(session != nullptr);
+  TF_ASSERT_OK(session->Create(def));
+
+  std::vector<Tensor> outputs;
+
+  string handle;
+  Status s = session->PRunSetup(
+      {first_const->name(), second_const->name()},
+      {first_identity->name() + ":0", second_identity->name() + ":0",
+       third_identity->name() + ":0"},
+      {}, &handle);
+  TF_ASSERT_OK(s);
+
+  Tensor value_11(DT_FLOAT, TensorShape({}));
+  value_11.scalar<float>()() = 11.0;
+  Tensor value_22(DT_FLOAT, TensorShape({}));
+  value_22.scalar<float>()() = 22.0;
+
+  // Close the session.
+  session->Close();
+
+  // Feed first_const, fetch first_identity
+  s = session->PRun(handle, {{first_const->name(), value_11}},
+                    {first_identity->name() + ":0"}, &outputs);
+  EXPECT_EQ("Cancelled: Session has been closed.", s.ToString());
+}
+
+TEST(DirectSessionTest, TestDirectSessionReset) {
+  // Construct a graph with a variable and a single assign.
+  Graph g(OpRegistry::Global());
+  Tensor t(DT_FLOAT, TensorShape({}));
+  t.scalar<float>()() = {1.2};
+  Node* var_val = test::graph::Constant(&g, t);
+  Node* var = test::graph::Var(&g, DT_FLOAT, {});
+  Node* var_assign = test::graph::Assign(&g, var, var_val);
+  GraphDef def;
+  test::graph::ToGraphDef(&g, &def);
+
+  SessionOptions options;
+  (*options.config.mutable_device_count())["CPU"] = 2;
+  std::unique_ptr<Session> session(NewSession(options));
+  ASSERT_TRUE(session != nullptr);
+  TF_ASSERT_OK(session->Create(def));
+
+  // Assign a value to the var.
+  TF_ASSERT_OK(session->Run({} /* inputs */, {},
+                            {var_assign->name()} /* target_nodes */, nullptr));
+
+  // Run a read on the variable to ensure that it works.
+  std::vector<Tensor> outputs;
+  TF_ASSERT_OK(session->Run(
+      {} /* inputs */, {var->name() + ":0"} /* output_names */, {}, &outputs));
+  EXPECT_EQ(t.scalar<float>()(), outputs[0].scalar<float>()());
+  outputs.clear();
+
+  // Reset the containers.
+  Reset(options, {});
+
+  // Run the read on the variable to get an error.
+  // TODO(suharshs): This test only works because we close the Session in Reset.
+  // If we change the behavior of Reset to not close the Session, this test will
+  // fail, since the Variable buffer is cached by var.
+  Status s = session->Run({} /* inputs */, {},
+                          {var_assign->name()} /* target_nodes */, nullptr);
+  EXPECT_EQ("Cancelled: Session has been closed.", s.ToString());
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc
index 2f5507a0c55..e263e62bd84 100644
--- a/tensorflow/core/common_runtime/function_test.cc
+++ b/tensorflow/core/common_runtime/function_test.cc
@@ -144,7 +144,7 @@ class FunctionLibraryRuntimeTest : public ::testing::Test {
 
   void Init(const std::vector<FunctionDef>& flib) {
     FunctionDefLibrary proto;
-    for (auto fdef : flib) *(proto.add_function()) = fdef;
+    for (const auto& fdef : flib) *(proto.add_function()) = fdef;
     delete lib_def_;
     lib_def_ = new FunctionLibraryDefinition(OpRegistry::Global(), proto);
     delete lib_;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
index 7506e35ff34..f18ee5efd85 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
@@ -95,7 +95,7 @@ void EventMgr::ThenDeleteTensors(perftools::gputools::Stream* stream,
     FlushAccumulatedTensors();
   }
   accumulated_stream_ = stream;
-  for (auto t : tensors) {
+  for (const auto& t : tensors) {
     // accumulated_tensors_ takes over ownership of the reference to "t"
     accumulated_tensors_->push_back(t);
     accumulated_tensor_bytes_ += t.TotalBytes();
diff --git a/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc b/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc
index 5b4812bb34a..3aaaf87e79c 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc
@@ -129,7 +129,7 @@ TEST_F(GpuStreamUtilTest, StreamOverrides) {
   // Nodes should be assigned to streams by op type.
   for (const auto& it : node_to_stream_id) {
     Node* n = g.FindNodeId(it.first);
-    const string op = n->type_string();
+    const string& op = n->type_string();
     const int stream = it.second;
     if (op == "Const") {
       EXPECT_EQ(stream, 90);
diff --git a/tensorflow/core/common_runtime/gpu/pool_allocator.cc b/tensorflow/core/common_runtime/gpu/pool_allocator.cc
index b44108d1ace..e0362b38e6b 100644
--- a/tensorflow/core/common_runtime/gpu/pool_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/pool_allocator.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <sys/mman.h>  // for munmap
 
 #include <map>
+#include <utility>
 
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/platform/logging.h"
@@ -31,7 +32,7 @@ namespace tensorflow {
 PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize,
                              SubAllocator* allocator,
                              RoundUpInterface* size_rounder, string name)
-    : name_(name),
+    : name_(std::move(name)),
       has_size_limit_(pool_size_limit > 0),
       auto_resize_(auto_resize),
       pool_size_limit_(pool_size_limit),
@@ -125,7 +126,7 @@ void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
     return PrepareChunk(r, alignment, num_bytes);
   } else {
     void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes);
-    for (auto v : alloc_visitors_) {
+    for (const auto& v : alloc_visitors_) {
       v(ptr, num_bytes);
     }
     return PrepareChunk(ptr, alignment, num_bytes);
@@ -137,7 +138,7 @@ void PoolAllocator::DeallocateRaw(void* ptr) {
   ChunkPrefix* cp = FindPrefix(ptr);
   CHECK_LE((void*)cp, (void*)ptr);
   if (!has_size_limit_ && !auto_resize_) {
-    for (auto v : free_visitors_) {
+    for (const auto& v : free_visitors_) {
       v(cp, cp->num_bytes);
     }
     allocator_->Free(cp, cp->num_bytes);
@@ -160,7 +161,7 @@ void PoolAllocator::Clear() {
     mutex_lock lock(mutex_);
     for (auto iter : pool_) {
       PtrRecord* pr = iter.second;
-      for (auto v : free_visitors_) {
+      for (const auto& v : free_visitors_) {
         v(pr->ptr, pr->num_bytes);
       }
       allocator_->Free(pr->ptr, pr->num_bytes);
@@ -217,7 +218,7 @@ void PoolAllocator::EvictOne() {
     DCHECK(iter != pool_.end());
   }
   pool_.erase(iter);
-  for (auto v : free_visitors_) {
+  for (const auto& v : free_visitors_) {
     v(prec->ptr, prec->num_bytes);
   }
   allocator_->Free(prec->ptr, prec->num_bytes);
diff --git a/tensorflow/core/common_runtime/gpu/process_state.cc b/tensorflow/core/common_runtime/gpu/process_state.cc
index f85b37cb8f4..60da115988e 100644
--- a/tensorflow/core/common_runtime/gpu/process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/process_state.cc
@@ -181,12 +181,25 @@ Allocator* ProcessState::GetCUDAHostAllocator(int numa_node) {
   // different numa_nodes.  For now, just one.
   numa_node = 0;
   mutex_lock lock(mu_);
+
+  // Find the first valid StreamExecutor to request CUDA host memory
+  // through, since any will work.
+  //
+  // This search isn't super clean, and it would be nice to use a
+  // better source of information about which executor to use.  For
+  // example, process_state could maybe save the first stream executor
+  // it knows is valid.
+  gpu::StreamExecutor* se = nullptr;
+  for (size_t i = 0; i < gpu_allocators_.size(); ++i) {
+    if (gpu_allocators_[i] != nullptr) {
+      se = GPUMachineManager()->ExecutorForDevice(i).ValueOrDie();
+      break;
+    }
+  }
+
+  CHECK_NE(nullptr, se);
+
   while (static_cast<int>(cuda_host_allocators_.size()) <= numa_node) {
-    // CUDAHost alloc the same across all gpus, so just get the
-    // executor for the first device.
-    gpu::Platform* gpu_platform = GPUMachineManager();
-    gpu::StreamExecutor* se = gpu_platform->ExecutorForDevice(0).ValueOrDie();
-    CHECK(se);
     Allocator* allocator = nullptr;
     static constexpr bool kCudaHostMemoryUseBFC = true;
     if (kCudaHostMemoryUseBFC) {
diff --git a/tensorflow/core/common_runtime/simple_graph_execution_state.cc b/tensorflow/core/common_runtime/simple_graph_execution_state.cc
index 15a9baf52fc..365120dd0e7 100644
--- a/tensorflow/core/common_runtime/simple_graph_execution_state.cc
+++ b/tensorflow/core/common_runtime/simple_graph_execution_state.cc
@@ -44,6 +44,7 @@ SimpleGraphExecutionState::SimpleGraphExecutionState(
     const SimpleGraphExecutionStateOptions& options)
     : device_set_(options.device_set),
       session_options_(options.session_options),
+      costs_(true /*is_global*/),
       flib_def_(
           new FunctionLibraryDefinition(OpRegistry::Global(), func_def_lib)),
       graph_(nullptr) {
@@ -53,6 +54,7 @@ SimpleGraphExecutionState::SimpleGraphExecutionState(
 
 SimpleGraphExecutionState::~SimpleGraphExecutionState() {
   mutex_lock l(mu_);
+  node_name_to_cost_id_map_.clear();
   delete graph_;
 }
 
@@ -178,6 +180,10 @@ Status SimpleGraphExecutionState::InitBaseGraph(
   GraphConstructorOptions opts;
   TF_RETURN_IF_ERROR(
       ConvertGraphDefToGraph(opts, original_graph_def_, new_graph.get()));
+  for (const Node* n : new_graph->nodes()) {
+    VLOG(2) << "Mapping " << n->name() << " to " << n->cost_id();
+    node_name_to_cost_id_map_[n->name()] = n->cost_id();
+  }
   if (session_options_ &&
       session_options_->config.graph_options().place_pruned_graph()) {
     // Rewrite the graph before placement.
@@ -189,10 +195,15 @@ Status SimpleGraphExecutionState::InitBaseGraph(
   // Save stateful placements before placing.
   RestoreStatefulNodes(new_graph.get());
 
+  CostModel costs(true /*is_global*/);
+  costs_.InitFromGraph(*new_graph.get());
+  costs.MergeFromGlobal(costs_);
+
   GraphOptimizationPassOptions optimization_options;
   optimization_options.session_options = session_options_;
   optimization_options.graph = &new_graph;
   optimization_options.flib_def = flib_def_.get();
+  optimization_options.cost_model = &costs;
 
   TF_RETURN_IF_ERROR(OptimizationPassRegistry::Global()->RunGrouping(
       OptimizationPassRegistry::PRE_PLACEMENT, optimization_options));
@@ -209,6 +220,31 @@ Status SimpleGraphExecutionState::InitBaseGraph(
   return Status::OK();
 }
 
+void SimpleGraphExecutionState::UpdateCostsFromStats(const StepStats& ss) {
+  mutex_lock l(mu_);
+  costs_.MergeFromStats(node_name_to_cost_id_map_, ss);
+}
+
+void SimpleGraphExecutionState::MergeCostsFromGlobal(CostModel* costs) {
+  mutex_lock l(mu_);
+  costs->MergeFromGlobal(costs_);
+}
+
+Status SimpleGraphExecutionState::GlobalNodeDefByName(const string& name,
+                                                      NodeDef* out) {
+  NodeNameToCostIdMap::const_iterator iter =
+      node_name_to_cost_id_map_.find(name);
+  if (iter != node_name_to_cost_id_map_.end()) {
+    mutex_lock l(mu_);  // could use reader lock
+    const Node* node = graph_->FindNodeId(iter->second);
+    if (node) {
+      *out = node->def();
+      return Status::OK();
+    }
+  }
+  return errors::NotFound("Node name: ", name);
+}
+
 Status SimpleGraphExecutionState::BuildGraph(
     const BuildGraphOptions& options, std::unique_ptr<SimpleClientGraph>* out) {
   VLOG(1) << "BuildGraph";
@@ -234,10 +270,14 @@ Status SimpleGraphExecutionState::BuildGraph(
   std::unique_ptr<FunctionLibraryDefinition> flib(
       new FunctionLibraryDefinition(*flib_def_));
 
+  // TODO(andydavis): Clarify optimization pass requirements around CostModel.
+  CostModel costs(true /*is_global*/);
+  costs.MergeFromGlobal(costs_);
   GraphOptimizationPassOptions optimization_options;
   optimization_options.session_options = session_options_;
   optimization_options.graph = &ng;
   optimization_options.flib_def = flib.get();
+  optimization_options.cost_model = &costs;
 
   TF_RETURN_IF_ERROR(OptimizationPassRegistry::Global()->RunGrouping(
       OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, optimization_options));
diff --git a/tensorflow/core/common_runtime/simple_graph_execution_state.h b/tensorflow/core/common_runtime/simple_graph_execution_state.h
index 595da551d43..5fe16f0f421 100644
--- a/tensorflow/core/common_runtime/simple_graph_execution_state.h
+++ b/tensorflow/core/common_runtime/simple_graph_execution_state.h
@@ -119,6 +119,22 @@ class SimpleGraphExecutionState {
   // execution, e.g. a send, recv or feed node.
   Status GlobalNodeDefByName(const string& name, NodeDef* out);
 
+  // Sums execution statistics in "ss" into the CostModel.
+  void UpdateCostsFromStats(const StepStats& ss);
+
+  Microseconds TimeEstimate(const Node* n) {
+    mutex_lock l(mu_);  // could use reader lock
+    return costs_.TimeEstimate(n);
+  }
+
+  Bytes SizeEstimate(const Node* n, int output_slot) {
+    mutex_lock l(mu_);  // could use reader lock
+    return costs_.SizeEstimate(n, output_slot);
+  }
+
+  // Merge the cost model maintained by this graph_execution_state to 'costs'.
+  void MergeCostsFromGlobal(CostModel* costs);
+
   // The graph returned by BuildGraph may contain only the pruned
   // graph, whereas some clients may want access to the full graph.
   const Graph* full_graph() {
@@ -162,6 +178,11 @@ class SimpleGraphExecutionState {
   const DeviceSet* device_set_;            // Not owned
   const SessionOptions* session_options_;  // Not owned
 
+  CostModel costs_ GUARDED_BY(mu_);
+
+  // Map from name to Node for the full graph in placed_.
+  NodeNameToCostIdMap node_name_to_cost_id_map_;
+
   // 'flib_def_' is initialized from the initial graph def's library,
   // and may be updated by a graph optimization pass.
   std::unique_ptr<FunctionLibraryDefinition> flib_def_;
diff --git a/tensorflow/core/common_runtime/simple_placer.cc b/tensorflow/core/common_runtime/simple_placer.cc
index 6e177da57fc..552265a2765 100644
--- a/tensorflow/core/common_runtime/simple_placer.cc
+++ b/tensorflow/core/common_runtime/simple_placer.cc
@@ -42,7 +42,7 @@ std::vector<Device*> FilterSupportedDevices(
     const std::vector<Device*>& devices,
     const DeviceTypeVector& supported_device_types) {
   std::vector<Device*> filtered_devices;
-  for (DeviceType d : supported_device_types) {
+  for (const DeviceType& d : supported_device_types) {
     for (Device* device : devices) {
       if (DeviceType(device->attributes().device_type()) == d) {
         filtered_devices.emplace_back(device);
@@ -238,11 +238,15 @@ class ColocationGraph {
       // members_[old_root].supported_device_types.
       MergeSupportedDevices(&members_[new_root].supported_device_types,
                             members_[old_root].supported_device_types);
-      if (members_[x_root].supported_device_types.size() == 0) {
+      if (members_[new_root].supported_device_types.size() == 0) {
+        string debug_info;
+        AddDebugInfo(x_root, &debug_info);
+        AddDebugInfo(y_root, &debug_info);
         return errors::InvalidArgument(
             "Cannot colocate nodes '", x.name(), "' and '", y.name(),
             "' because no device type supports both of those nodes and the "
-            "other nodes colocated with them");
+            "other nodes colocated with them.",
+            debug_info);
       }
     }
     return Status::OK();
@@ -495,7 +499,7 @@ class ColocationGraph {
                                 "' does not match any device");
       }
 
-      for (DeviceType d : member->supported_device_types) {
+      for (const DeviceType& d : member->supported_device_types) {
         if (DeviceType(assigned_device->attributes().device_type()) == d) {
           return Status::OK();
         }
@@ -545,9 +549,9 @@ class ColocationGraph {
     target->clear();
 
     // Iterate in priority order.
-    for (DeviceType device_type : temp) {
+    for (const DeviceType& device_type : temp) {
       bool found = false;
-      for (DeviceType other_device_type : other) {
+      for (const DeviceType& other_device_type : other) {
         if (device_type == other_device_type) {
           found = true;
           break;
diff --git a/tensorflow/core/common_runtime/simple_placer_test.cc b/tensorflow/core/common_runtime/simple_placer_test.cc
index e17a40d7209..148fc973ddd 100644
--- a/tensorflow/core/common_runtime/simple_placer_test.cc
+++ b/tensorflow/core/common_runtime/simple_placer_test.cc
@@ -689,8 +689,9 @@ TEST_F(SimplePlacerTest,
   Status s = Place(&g);
   EXPECT_TRUE(
       StringPiece(s.error_message())
-          .contains("Cannot assign a device to node 'var3': Node had no "
-                    "OpKernel registered"));
+          .contains("Cannot colocate nodes 'var3' and 'assign3' because no "
+                    "device type supports both of those nodes and the other "
+                    "nodes colocated with them."));
 }
 
 TEST_F(SimplePlacerTest, TestColocationAndReferenceConnections) {
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 6d31ae3ab82..b3668fb8924 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -54,9 +54,9 @@ namespace tensorflow {
 namespace {
 // A little bit of per-step state.
 struct PerStepState {
+  bool collect_timeline;
   Microseconds start_micros = Microseconds(0);
   Microseconds end_micros = Microseconds(0);
-  std::vector<StepStats> step_stats;  // per partition
 };
 
 // A session encapsulates a graph computation (resource allocation,
@@ -522,6 +522,10 @@ Status MasterSession::ReffedClientGraph::RunPartitions(
 
   // Prepares a number of calls to workers. One call per partition.
   ExecutorOpts exec_opts;
+  if (pss->collect_timeline) {
+    exec_opts.set_record_timeline(true);
+  }
+
   const int num = partitions_.size();
   RunManyGraphs calls(num);
 
@@ -597,8 +601,9 @@ Status MasterSession::ReffedClientGraph::RunPartitions(
           break;
         }
       }
-      if (calls.get(i)->resp.has_step_stats()) {
-        pss->step_stats[i].Swap(calls.get(i)->resp.mutable_step_stats());
+      if (pss->collect_timeline && calls.get(i)->resp.has_step_stats()) {
+        resp->mutable_metadata()->mutable_step_stats()->MergeFrom(
+            calls.get(i)->resp.step_stats());
       }
     }
   }
@@ -953,6 +958,8 @@ Status MasterSession::DoRunWithLocalExecution(CallOptions* opts,
   const uint64 step_id = (random::New64() & ((1uLL << 56) - 1)) | (1uLL << 56);
   TRACEPRINTF("stepid %llu", step_id);
 
+  pss.collect_timeline = req->options().trace_level() == RunOptions::FULL_TRACE;
+
   TF_RETURN_IF_ERROR(rcg->RunPartitions(env_, step_id, count,
                                         execution_state_.get(), &pss, opts,
                                         *req, resp, cancellation_manager_));
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_session.cc b/tensorflow/core/distributed_runtime/rpc/grpc_session.cc
index e335785bed6..886f04a7561 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_session.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_session.cc
@@ -162,6 +162,8 @@ Status GrpcSession::Run(const RunOptions& run_options,
   RunStepRequest req;
   RunStepResponse resp;
 
+  *req.mutable_options() = run_options;
+
   for (const auto& it : inputs) {
     Tensor input_tensor = it.second;
     auto feed = req.add_feed();
@@ -206,6 +208,10 @@ Status GrpcSession::Run(const RunOptions& run_options,
     (*outputs)[fetch_it->second] = output;
   }
 
+  if (run_metadata) {
+    run_metadata->Swap(resp.mutable_metadata());
+  }
+
   return Status::OK();
 }
 
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc b/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
index 780db8bd2f8..86a09551fdc 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_session_test.cc
@@ -75,6 +75,9 @@ static SessionOptions Options(const string& target, int placement_period) {
   // string.
   options.target = strings::StrCat("grpc://", target);
   options.config.set_placement_period(placement_period);
+  options.config.mutable_graph_options()
+      ->mutable_optimizer_options()
+      ->set_opt_level(OptimizerOptions::L0);
   return options;
 }
 
@@ -307,9 +310,29 @@ TEST(GrpcSessionTest, MultiDevices) {
         TF_CHECK_OK(session->Create(def));
         {
           std::vector<Tensor> outputs;
-          TF_CHECK_OK(session->Run({}, {c->name()}, {}, &outputs));
+          RunOptions options;
+          options.set_trace_level(RunOptions::FULL_TRACE);
+          RunMetadata metadata;
+          TF_CHECK_OK(
+              session->Run(options, {}, {c->name()}, {}, &outputs, &metadata));
           ASSERT_EQ(1, outputs.size());
           IsSingleFloatValue(outputs[0], 6.0 * kSize);
+
+          const StepStats& ss = metadata.step_stats();
+          // NOTE(mrry): We only assert that `c` is placed correctly,
+          // because the current placement algorithm will move its
+          // inputs to be colocated with it, when it is the sole
+          // consumer.
+          bool c_placed_correctly = false;
+          for (const auto& dev : ss.dev_stats()) {
+            for (const auto& node : dev.node_stats()) {
+              if (node.node_name() == c->name() &&
+                  dev.device() == c_dev.name()) {
+                c_placed_correctly = true;
+              }
+            }
+          }
+          ASSERT_TRUE(c_placed_correctly);
         }
         TF_CHECK_OK(session->Close());
       }
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
index e897bfc477b..4a5d54915eb 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc
@@ -325,7 +325,10 @@ class GrpcWorkerService : public AsyncServiceInterface {
       return;
     }
     StepStatsCollector* collector = nullptr;
-    // TODO(mrry): Collect results from a profiler if available.
+    if (call->request.exec_opts().record_timeline()) {
+      collector = new StepStatsCollector(call->response.mutable_step_stats());
+      // TODO(mrry,pbar): GPU tracing for distributed steps.
+    }
     CancellationManager* cm = new CancellationManager;
     call->SetCancelCallback([this, cm, step_id]() {
       cm->StartCancel();
@@ -340,7 +343,8 @@ class GrpcWorkerService : public AsyncServiceInterface {
     }
     env_->graph_mgr->ExecuteAsync(
         call->request.graph_handle(), step_id, call->request.exec_opts(),
-        collector, cm, in, out, [this, call, cm, out, token](Status s) {
+        collector, cm, in, out,
+        [this, call, cm, out, token, collector](Status s) {
           call->ClearCancelCallback();
           {
             mutex_lock l(mu_);
@@ -359,6 +363,7 @@ class GrpcWorkerService : public AsyncServiceInterface {
               val.AsProtoField(proto);
             }
           }
+          delete collector;
           delete out;
           call->SendResponse(ToGrpcStatus(s));
         });
diff --git a/tensorflow/core/framework/cost_graph.proto b/tensorflow/core/framework/cost_graph.proto
index 62980db6da4..192d2e31f0a 100644
--- a/tensorflow/core/framework/cost_graph.proto
+++ b/tensorflow/core/framework/cost_graph.proto
@@ -39,6 +39,9 @@ message CostGraphDef {
     // Temporary memory used by this node.
     int64 temporary_memory_size = 6;
 
+    // Estimate of the computational cost of this node.
+    int64 compute_cost = 9;
+
     // If true, the output is permanent: it can't be discarded, because this
     // node is part of the "final output". Nodes may depend on final nodes.
     bool is_final = 7;
diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index 83676a90c51..bedc85ab4e7 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc
@@ -861,11 +861,11 @@ string DebugString(const GraphDef& instantiated_func_def) {
 
 string DebugStringWhole(const GraphDef& gdef) {
   string ret;
-  for (auto fdef : gdef.library().function()) {
+  for (const auto& fdef : gdef.library().function()) {
     strings::StrAppend(&ret, Print(fdef));
   }
   strings::StrAppend(&ret, "\n");
-  for (auto ndef : gdef.node()) {
+  for (const auto& ndef : gdef.node()) {
     strings::StrAppend(&ret, Print(ndef), "\n");
   }
   return ret;
diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h
index 4a9c62f08ce..07a9451e635 100644
--- a/tensorflow/core/framework/function.h
+++ b/tensorflow/core/framework/function.h
@@ -33,7 +33,6 @@ limitations under the License.
 namespace tensorflow {
 
 class CancellationManager;
-class Node;
 class OpKernel;
 class ResourceMgr;
 
diff --git a/tensorflow/core/framework/function_testlib.cc b/tensorflow/core/framework/function_testlib.cc
index 900ceed1a59..47db0f03391 100644
--- a/tensorflow/core/framework/function_testlib.cc
+++ b/tensorflow/core/framework/function_testlib.cc
@@ -31,11 +31,11 @@ GraphDef GDef(gtl::ArraySlice<NodeDef> nodes,
   VersionDef* versions = g.mutable_versions();
   versions->set_producer(TF_GRAPH_DEF_VERSION);
   versions->set_min_consumer(TF_GRAPH_DEF_VERSION_MIN_CONSUMER);
-  for (auto n : nodes) {
+  for (const auto& n : nodes) {
     *(g.add_node()) = n;
   }
   auto lib = g.mutable_library();
-  for (auto f : funcs) {
+  for (const auto& f : funcs) {
     *(lib->add_function()) = f;
   }
   return g;
@@ -49,7 +49,7 @@ NodeDef NDef(const string& name, const string& op,
   NodeDef n;
   n.set_name(name);
   n.set_op(op);
-  for (auto in : inputs) n.add_input(in);
+  for (const auto& in : inputs) n.add_input(in);
   n.set_device(device);
   for (auto na : attrs) n.mutable_attr()->insert({na.first, na.second.proto});
   return n;
diff --git a/tensorflow/core/framework/op_def_util.cc b/tensorflow/core/framework/op_def_util.cc
index 5717488b1cb..c36e6dd653b 100644
--- a/tensorflow/core/framework/op_def_util.cc
+++ b/tensorflow/core/framework/op_def_util.cc
@@ -60,7 +60,7 @@ Status AllowedTypeValue(DataType dt, const OpDef::AttrDef& attr) {
 
 Status AllowedStringValue(const string& str, const OpDef::AttrDef& attr) {
   const AttrValue& allowed_values(attr.allowed_values());
-  for (auto allowed : allowed_values.list().s()) {
+  for (const auto& allowed : allowed_values.list().s()) {
     if (str == allowed) {
       return Status::OK();
     }
diff --git a/tensorflow/core/framework/op_kernel_test.cc b/tensorflow/core/framework/op_kernel_test.cc
index db4b6037ef0..b4556c9272d 100644
--- a/tensorflow/core/framework/op_kernel_test.cc
+++ b/tensorflow/core/framework/op_kernel_test.cc
@@ -381,7 +381,7 @@ class OpKernelBuilderTest : public ::testing::Test {
     DeviceTypeVector devices;
     TF_EXPECT_OK(SupportedDeviceTypesForNode(DeviceTypes(), def, &devices));
     bool found = false;
-    for (DeviceType dt : devices) {
+    for (const DeviceType& dt : devices) {
       if (dt == device_type) {
         found = true;
       }
@@ -414,7 +414,7 @@ class OpKernelBuilderTest : public ::testing::Test {
       DeviceTypeVector devices;
       if (errors::IsNotFound(status)) {
         TF_EXPECT_OK(SupportedDeviceTypesForNode(DeviceTypes(), def, &devices));
-        for (DeviceType dt : devices) {
+        for (const DeviceType& dt : devices) {
           EXPECT_NE(dt, device_type);
         }
       } else {
diff --git a/tensorflow/core/framework/tensor_shape.h b/tensorflow/core/framework/tensor_shape.h
index 11468c1b482..19e73b9087b 100644
--- a/tensorflow/core/framework/tensor_shape.h
+++ b/tensorflow/core/framework/tensor_shape.h
@@ -200,7 +200,7 @@ class TensorShape {
   DataType data_type() const { return static_cast<DataType>(buf()[13]); }
   void set_data_type(DataType dt) {
     // We only have 8 bits available to store DataType, so make sure it fits
-    DCHECK_LT(static_cast<uint32>(dt), 256);
+    DCHECK_LT(static_cast<uint32>(dt), 256u);
     buf()[13] = static_cast<uint8>(dt);
   }
 
diff --git a/tensorflow/core/framework/tensor_slice.h b/tensorflow/core/framework/tensor_slice.h
index 8c4a2adeb37..fca40e0894e 100644
--- a/tensorflow/core/framework/tensor_slice.h
+++ b/tensorflow/core/framework/tensor_slice.h
@@ -94,7 +94,9 @@ class TensorSlice {
   }
 
   // If we have a full slice along dimension "d".
-  bool IsFullAt(int d) const { return lengths_[d] < 0; }
+  bool IsFullAt(int d) const {
+    return lengths_[d] == kFullExtent && starts_[d] == 0;
+  }
 
   // If this is a full slice, i.e. IsFullAt(d) for every d.
   bool IsFull() const;
diff --git a/tensorflow/core/framework/tensor_slice_test.cc b/tensorflow/core/framework/tensor_slice_test.cc
index e26c8409980..bb32fa07249 100644
--- a/tensorflow/core/framework/tensor_slice_test.cc
+++ b/tensorflow/core/framework/tensor_slice_test.cc
@@ -273,8 +273,8 @@ TEST(TensorSliceTest, Deserialization) {
   TensorSlice ts3(proto3);
 
   // Both serializations should be interpreted the same.
-  EXPECT_EQ("0,5:0,10:14,1:-:-", ts2.DebugString());
-  EXPECT_EQ("0,5:0,10:14,1:-:-", ts3.DebugString());
+  EXPECT_EQ("0,5:0,10:14,1:1,-1:-", ts2.DebugString());
+  EXPECT_EQ("0,5:0,10:14,1:1,-1:-", ts3.DebugString());
 }
 
 TEST(TensorSliceTest, UpdateToCover) {
diff --git a/tensorflow/core/graph/optimizer_cse_test.cc b/tensorflow/core/graph/optimizer_cse_test.cc
index 0841bac93cd..1091af4e451 100644
--- a/tensorflow/core/graph/optimizer_cse_test.cc
+++ b/tensorflow/core/graph/optimizer_cse_test.cc
@@ -326,7 +326,7 @@ TEST_F(OptimizerCSETest, Constant_Dedup) {
 
   // A graph contains a bunch of constants.
   Graph g(OpRegistry::Global());
-  for (auto val : {a, b, c, d, d, c, b, a}) {
+  for (const auto& val : {a, b, c, d, d, c, b, a}) {
     test::graph::Constant(&g, val);  // Node name is n/_0, n/_1, ...
   }
   GraphDef gdef;
diff --git a/tensorflow/core/graph/quantize_training.cc b/tensorflow/core/graph/quantize_training.cc
index 8521dff6fa2..930d7bd15f6 100644
--- a/tensorflow/core/graph/quantize_training.cc
+++ b/tensorflow/core/graph/quantize_training.cc
@@ -74,7 +74,7 @@ inline bool IsGradientNode(const Graph* graph, const Node* node) {
 // Returns true if the root tensor op type is known, false otherwise.
 bool FindType(const Graph* graph, const Node* node, bool* signed_input,
               bool* range_given, float* input_min, float* input_max) {
-  const string src_op = node->type_string();
+  const string& src_op = node->type_string();
   if (src_op == "Const" || src_op == "Variable") {
     *signed_input = true;
     *range_given = false;
diff --git a/tensorflow/core/graph/shape_refiner.cc b/tensorflow/core/graph/shape_refiner.cc
index e45e4e0d633..66a5202a147 100644
--- a/tensorflow/core/graph/shape_refiner.cc
+++ b/tensorflow/core/graph/shape_refiner.cc
@@ -113,6 +113,36 @@ Status ShapeRefiner::AddNode(const Node* node) {
   return Status::OK();
 }
 
+Status ShapeRefiner::SetShape(const Node* node, int output_port,
+                              shape_inference::ShapeHandle shape) {
+  auto c = GetContext(node);
+  if (c == nullptr) {
+    return errors::Internal("Could not find context for ", node->name());
+  }
+
+  if (output_port < 0 || output_port >= node->num_outputs()) {
+    return errors::InvalidArgument(
+        "output_port '", output_port, "' is out of range, ", "node '",
+        node->name(), "' has ", node->num_outputs(), " outputs");
+  }
+
+  // Check compatibility
+  shape_inference::ShapeHandle existing_shape = c->output(output_port);
+  shape_inference::ShapeHandle unused;
+  TF_RETURN_IF_ERROR(c->Merge(existing_shape, shape, &unused));
+
+  c->set_output(output_port, shape);
+
+  // TODO(vrv): Do we need to propagate the new shape through all
+  // consumers that change their outputs?  At the moment, python
+  // does not do this, but this seems like a nice feature.
+
+  // TODO(vrv): We might need to keep track of the fact that the
+  // existing shape is invalidated, in case we need to propagate
+  // this information to remote workers.
+  return Status::OK();
+}
+
 Status ShapeRefiner::ConstantValue(const Node* node, Tensor* tensor_storage,
                                    const Tensor** input_tensor) const {
   *input_tensor = nullptr;
diff --git a/tensorflow/core/graph/shape_refiner.h b/tensorflow/core/graph/shape_refiner.h
index 21551903c03..63838e1cfdd 100644
--- a/tensorflow/core/graph/shape_refiner.h
+++ b/tensorflow/core/graph/shape_refiner.h
@@ -46,6 +46,14 @@ class ShapeRefiner {
   //  - The shape inference function returns an error.
   Status AddNode(const Node* node);
 
+  // Sets 'node's 'output_port' output to have shape 'shape'.
+  //
+  // Returns an error if 'node' was not previously added to this
+  // object, if 'output_port' is invalid, or if 'shape' is
+  // not compatible with the existing shape of the output.
+  Status SetShape(const Node* node, int output_port,
+                  shape_inference::ShapeHandle shape);
+
   // Returns the InferenceContext for 'node', if present.
   shape_inference::InferenceContext* GetContext(const Node* node) const {
     auto it = node_to_context_.find(node);
diff --git a/tensorflow/core/graph/shape_refiner_test.cc b/tensorflow/core/graph/shape_refiner_test.cc
index 94cd6dc74a9..ac4cf94546c 100644
--- a/tensorflow/core/graph/shape_refiner_test.cc
+++ b/tensorflow/core/graph/shape_refiner_test.cc
@@ -92,6 +92,33 @@ TEST(ShapeRefinerTest, BadShapes) {
   ASSERT_EQ("Dimensions must be equal, but are 1 and 2", s.error_message());
 }
 
+TEST(ShapeRefinerTest, SetShape) {
+  ShapeRefiner m;
+
+  Scope root = Scope::NewRootScope();
+  auto a = ops::Const(root, {{1.0f}, {2.0f}});
+
+  TF_ASSERT_OK(m.AddNode(a.node()));
+
+  auto ic = m.GetContext(a.node());
+  ASSERT_NE(nullptr, ic);
+  shape_inference::ShapeHandle h = ic->MakeShape({2, ic->UnknownDim()});
+  TF_ASSERT_OK(m.SetShape(a.node(), 0, h));
+  EXPECT_SHAPE("[2,?]", m, a, 0);
+
+  // Out of range.
+  ASSERT_FALSE(m.SetShape(a.node(), 1, h).ok());
+  ASSERT_FALSE(m.SetShape(a.node(), -1, h).ok());
+
+  auto b = ops::Const(root, {{1.0f}, {2.0f}});
+  // Forget to add node first.
+  ASSERT_FALSE(m.SetShape(b.node(), 0, h).ok());
+
+  // Set an incompatible shape (3 vs 2)
+  h = ic->MakeShape({3, ic->UnknownDim()});
+  ASSERT_FALSE(m.SetShape(a.node(), 0, h).ok());
+}
+
 TEST(ShapeRefinerTest, PropagateConstants) {
   // Reduction dimension is a variable, so we don't know its value.
   // So the output shape value is unknown (though its rank is known).
diff --git a/tensorflow/core/graph/subgraph.cc b/tensorflow/core/graph/subgraph.cc
index b6a32b9ea7f..c2978bbcf4a 100644
--- a/tensorflow/core/graph/subgraph.cc
+++ b/tensorflow/core/graph/subgraph.cc
@@ -235,7 +235,15 @@ Status RewriteGraphForExecution(
         "Must specify at least one target to fetch or execute.");
   }
 
-  std::unordered_set<string> endpoints(fed_outputs.begin(), fed_outputs.end());
+  std::unordered_set<string> endpoints;
+  for (const string& endpoint_name : fed_outputs) {
+    auto result = endpoints.insert(endpoint_name);
+    if (!result.second) {
+      return errors::InvalidArgument("Endpoint \"", endpoint_name,
+                                     "\" fed more than once.");
+    }
+  }
+
   for (const auto& fetch : fetch_outputs) {
     if (endpoints.count(fetch) > 0) {
       return errors::InvalidArgument(fetch, " is both fed and fetched.");
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 0b85a01a526..6a1967eaf57 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -491,6 +491,27 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "conv_ops_test",
+    size = "small",
+    deps = [
+        ":conv_ops",
+        ":image",
+        ":ops_testutil",
+        ":ops_util",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:tensorflow",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_cc_test(
     name = "example_parsing_ops_test",
     size = "large",
@@ -1325,6 +1346,7 @@ tf_kernel_library(
     hdrs = [
         "conv_grad_ops.h",
         "deep_conv2d.h",
+        "gemm_functors.h",
         "winograd_transform.h",
     ],
     prefix = "conv_ops",
@@ -1332,6 +1354,7 @@ tf_kernel_library(
         ":bounds_check",
         ":conv_2d",
         ":conv_3d",
+        ":image_resizer_state",
         ":ops_util",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
@@ -1958,6 +1981,7 @@ filegroup(
         "control_flow_ops.h",
         "conv_2d.h",
         "conv_ops.h",
+        "depthwise_conv_op.h",
         "image_resizer_state.h",
         "maxpooling_op.h",
         "pad_op.h",
@@ -1998,6 +2022,7 @@ filegroup(
         "cwise_op_div.cc",
         "cwise_op_equal_to.cc",
         "cwise_op_exp.cc",
+        "cwise_op_floor.cc",
         "cwise_op_greater.cc",
         "cwise_op_inverse.cc",
         "cwise_op_isfinite.cc",
@@ -2017,6 +2042,7 @@ filegroup(
         "cwise_op_tanh.cc",
         "deep_conv2d.cc",
         "deep_conv2d.h",
+        "depthwise_conv_op.cc",
         "dynamic_partition_op.cc",
         "winograd_transform.h",
         ":android_extended_ops_headers",
diff --git a/tensorflow/core/kernels/argmax_op.cc b/tensorflow/core/kernels/argmax_op.cc
index 595bd7bd5e4..2f92a2da9f8 100644
--- a/tensorflow/core/kernels/argmax_op.cc
+++ b/tensorflow/core/kernels/argmax_op.cc
@@ -67,7 +67,7 @@ class ArgOp : public OpKernel {
                                 input.shape().DebugString()));
 
     TensorShape output_shape;
-    TensorShape input_shape = input.shape();
+    const TensorShape& input_shape = input.shape();
     for (int d = 0; d < input_dims - 1; ++d) {
       output_shape.AddDim(input_shape.dim_size((d < dim) ? d : d + 1));
     }
diff --git a/tensorflow/core/kernels/attention_ops.cc b/tensorflow/core/kernels/attention_ops.cc
index 695068d3150..cc8f122cab3 100644
--- a/tensorflow/core/kernels/attention_ops.cc
+++ b/tensorflow/core/kernels/attention_ops.cc
@@ -41,7 +41,7 @@ class ExtractGlimpseOp : public OpKernel {
   // depth).
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    const TensorShape input_shape = input.shape();
+    const TensorShape& input_shape = input.shape();
     const int32 num_dims = input_shape.dims();
     OP_REQUIRES(
         context, num_dims == 4,
diff --git a/tensorflow/core/kernels/candidate_sampler_ops.cc b/tensorflow/core/kernels/candidate_sampler_ops.cc
index d64dca3d0b5..6aa9059dc70 100644
--- a/tensorflow/core/kernels/candidate_sampler_ops.cc
+++ b/tensorflow/core/kernels/candidate_sampler_ops.cc
@@ -190,7 +190,7 @@ class ComputeAccidentalHitsOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& in_true_candidates = context->input(0);
-    TensorShape in_true_candidates_shape = in_true_candidates.shape();
+    const TensorShape& in_true_candidates_shape = in_true_candidates.shape();
     OP_REQUIRES(context, TensorShapeUtils::IsMatrix(in_true_candidates_shape) &&
                              in_true_candidates_shape.dim_size(1) == num_true_,
                 errors::InvalidArgument(
diff --git a/tensorflow/core/kernels/constant_op_gpu.cu.cc b/tensorflow/core/kernels/constant_op_gpu.cu.cc
index 0e16c06a87f..29f39a72f39 100644
--- a/tensorflow/core/kernels/constant_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/constant_op_gpu.cu.cc
@@ -37,15 +37,12 @@ struct scalar_const_op {
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_const_op(const T* v) : val(v) {}
 
-  template <typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(Index,
-                                                           Index = 0) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()() const {
     return *val;
   }
 
-  template <typename Index, typename PacketType = Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType
-      packetOp(Index, Index = 0) const {
+  template <typename PacketType = Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetOp() const {
     return internal::pset1<PacketType>(*val);
   }
 };
diff --git a/tensorflow/core/kernels/conv_ops.h b/tensorflow/core/kernels/conv_ops.h
index d09db3dc15f..858be520b07 100644
--- a/tensorflow/core/kernels/conv_ops.h
+++ b/tensorflow/core/kernels/conv_ops.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_KERNELS_CONV_OPS_H_
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/util/tensor_format.h"
 
 #if GOOGLE_CUDA
@@ -38,6 +39,16 @@ class LaunchConv2DOp {
               TensorFormat data_format);
 };
 
+// Used to keep track of persistent memory buffers used within the op.
+template <class T, size_t size>
+struct Im2ColBufferResource : public ResourceBase {
+  // This mutex ensures that only a single operation at a time is able to use
+  // the buffer memory held by this resource.
+  mutex mu;
+  T data[size];
+  string DebugString() { return "Im2ColBufferResource"; }
+};
+
 #ifdef GOOGLE_CUDA
 template <typename T>
 class LaunchConv2DOp<Eigen::GpuDevice, T> {
diff --git a/tensorflow/core/kernels/conv_ops_fused.cc b/tensorflow/core/kernels/conv_ops_fused.cc
new file mode 100644
index 00000000000..865021405ac
--- /dev/null
+++ b/tensorflow/core/kernels/conv_ops_fused.cc
@@ -0,0 +1,486 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Implements convolution operations with other kernels baked into the
+// processing, to optimize latency and memory usage.
+
+#include <string.h>
+#include <map>
+#include <vector>
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/resource_mgr.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_slice.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/conv_ops.h"
+#include "tensorflow/core/kernels/gemm_functors.h"
+#include "tensorflow/core/kernels/image_resizer_state.h"
+#include "tensorflow/core/util/mirror_pad_mode.h"
+#include "tensorflow/core/util/padding.h"
+#include "tensorflow/core/util/tensor_format.h"
+
+namespace tensorflow {
+
+namespace {
+
+// Combines bilinear resizing and mirror padding into the im2col transformation
+// stage of convolution,
+template <class T1, class T2, class T3, class TGemmFunctor>
+class FusedResizeAndPadConvFunctor {
+ public:
+  void operator()(OpKernelContext* context, const Tensor& input,
+                  int input_batches, int resized_height, int resized_width,
+                  int padded_height, int padded_width, int input_depth,
+                  const T2* filter_data, int filter_height, int filter_width,
+                  int filter_count, int stride_rows, int stride_cols,
+                  Padding padding, T3* output_data, int output_height,
+                  int output_width, const ImageResizerState& st,
+                  int top_padding, int bottom_padding, int left_padding,
+                  int right_padding, int pad_offset) {
+    if ((input_batches <= 0) || (padded_width <= 0) || (padded_height <= 0) ||
+        (input_depth <= 0)) {
+      LOG(WARNING) << "Conv2D was called with bad input dimensions: "
+                   << input_batches << ", " << padded_height << ", "
+                   << padded_width << ", " << input_depth;
+      return;
+    }
+    if ((filter_width <= 0) || (filter_height <= 0) || (filter_count <= 0)) {
+      LOG(WARNING) << "Conv2D was called with bad filter dimensions: "
+                   << filter_width << ", " << filter_height << ", "
+                   << filter_count;
+      return;
+    }
+    if ((output_width <= 0) || (output_height <= 0)) {
+      LOG(WARNING) << "Conv2D was called with bad output width or height: "
+                   << output_width << ", " << output_height;
+      return;
+    }
+
+    // These calculations define how the patches will be positioned within the
+    // input image. The actual definitions are quite complex, and rely on the
+    // previously-calculated output size.
+    int filter_left_offset;
+    int filter_top_offset;
+    if (padding == VALID) {
+      filter_left_offset =
+          ((output_width - 1) * stride_cols + filter_width - padded_width + 1) /
+          2;
+      filter_top_offset = ((output_height - 1) * stride_rows + filter_height -
+                           padded_height + 1) /
+                          2;
+    } else {
+      filter_left_offset =
+          ((output_width - 1) * stride_cols + filter_width - padded_width) / 2;
+      filter_top_offset =
+          ((output_height - 1) * stride_rows + filter_height - padded_height) /
+          2;
+    }
+
+    // The im2col buffer has # of patches rows, and # of filters cols.
+    // It's laid out like this, in row major order in memory:
+    //        < filter value count >
+    //   ^   +---------------------+
+    // patch |                     |
+    // count |                     |
+    //   v   +---------------------+
+    // Each patch row contains a filter_width x filter_height patch of the
+    // input, with the depth channel as the most contiguous in memory, followed
+    // by the width, then the height. This is the standard memory order in the
+    // image world if it helps to visualize it.
+    const int filter_value_count = filter_width * filter_height * input_depth;
+
+    // We don't want to allocate a buffer to hold all the patches if the size is
+    // going to be extremely large, so break it into chunks if it's bigger than
+    // a limit. Each chunk will be processed serially, so we can refill the
+    // buffer for the next chunk and reuse it, keeping maximum memory size down.
+    // In this case, we've picked 16 megabytes as a reasonable limit.
+    const size_t max_chunk_size = (16 * 1024 * 1024);
+    OP_REQUIRES(context, (filter_value_count * sizeof(T1)) <= max_chunk_size,
+                errors::InvalidArgument("Im2Col patch too large for buffer"));
+    const size_t patches_per_chunk =
+        max_chunk_size / (filter_value_count * sizeof(T1));
+    // Because memory allocation is very expensive on mobile platforms, try to
+    // allocate a persistent buffer that will be kept around between calls. We
+    // use TensorFlow's resource management to ensure that the memory will be
+    // released when the session is over.
+    Im2ColBufferResource<T1, max_chunk_size>* im2col_buffer_resource;
+    std::function<Status(Im2ColBufferResource<T1, max_chunk_size>**)> creator =
+        [](Im2ColBufferResource<T1, max_chunk_size>** resource) {
+          *resource = new Im2ColBufferResource<T1, max_chunk_size>();
+          return Status::OK();
+        };
+    OP_REQUIRES_OK(context, context->resource_manager()->LookupOrCreate(
+                                "Conv2d", "im2col_buffer",
+                                &im2col_buffer_resource, creator));
+    // This means that multiple ops can't be run simultaneously on different
+    // threads, because we have a single shared resource. The platforms this is
+    // aimed at have intra-op parallelism as their focus though, so it shouldn't
+    // be an issue.
+    mutex_lock lock_buffer(im2col_buffer_resource->mu);
+    core::ScopedUnref unref_buffer(im2col_buffer_resource);
+    T1* im2col_buffer = im2col_buffer_resource->data;
+
+    typename TTypes<T1, 4>::ConstTensor input_data = input.tensor<T1, 4>();
+
+    for (int batch = 0; batch < input_batches; ++batch) {
+      for (int out_y = 0; out_y < output_height; ++out_y) {
+        const int in_y_origin = (out_y * stride_rows) - filter_top_offset;
+        for (int out_x = 0; out_x < output_width; ++out_x) {
+          const int in_x_origin = (out_x * stride_cols) - filter_left_offset;
+          const int patch_index = (batch * output_width * output_height) +
+                                  (out_y * output_width) + out_x;
+          const int patch_index_within_chunk = patch_index % patches_per_chunk;
+          T1* im2col_patch_start =
+              im2col_buffer + (patch_index_within_chunk * filter_value_count);
+          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
+            const int conv_in_y = in_y_origin + filter_y;
+            float in_y = (conv_in_y - top_padding);
+            if (in_y < 0) {
+              in_y = -(in_y + 1.0f - pad_offset);
+            } else if (in_y >= resized_height) {
+              in_y = (resized_height * 2.0f) - (in_y + 1.0f + pad_offset);
+            }
+            in_y *= st.height_scale;
+            const int64 top_y_index = static_cast<int64>(std::floor(in_y));
+            const int64 bottom_y_index = std::min(
+                static_cast<int64>(std::ceil(in_y)), (st.in_height - 1));
+            const T1 y_lerp = in_y - top_y_index;
+            T1* im2col_row_start =
+                im2col_patch_start + (filter_y * filter_width * input_depth);
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
+              const int conv_in_x = in_x_origin + filter_x;
+              float in_x = (conv_in_x - left_padding);
+              if (in_x < 0) {
+                in_x = -(in_x + 1.0f - pad_offset);
+              } else if (in_x >= resized_width) {
+                in_x = (resized_width * 2.0f) - (in_x + 1.0f + pad_offset);
+              }
+              in_x *= st.width_scale;
+              const int64 left_x_index = static_cast<int64>(std::floor(in_x));
+              const int64 right_x_index = std::min(
+                  static_cast<int64>(std::ceil(in_x)), (st.in_width - 1));
+              const T1 x_lerp = in_x - left_x_index;
+              T1* im2col_row_pixel =
+                  im2col_row_start + (filter_x * input_depth);
+              for (int in_channel = 0; in_channel < input_depth; ++in_channel) {
+                T1 in_value;
+                if ((conv_in_x >= 0) && (conv_in_x < padded_width) &&
+                    (conv_in_y >= 0) && (conv_in_y < padded_height)) {
+                  const T1 top_left(
+                      input_data(batch, top_y_index, left_x_index, in_channel));
+                  const T1 top_right(input_data(batch, top_y_index,
+                                                right_x_index, in_channel));
+                  const T1 bottom_left(input_data(batch, bottom_y_index,
+                                                  left_x_index, in_channel));
+                  const T1 bottom_right(input_data(batch, bottom_y_index,
+                                                   right_x_index, in_channel));
+                  const T1 top = top_left + (top_right - top_left) * x_lerp;
+                  const T1 bottom =
+                      bottom_left + (bottom_right - bottom_left) * x_lerp;
+                  in_value = top + (bottom - top) * y_lerp;
+                } else {
+                  in_value = T1(0);
+                }
+                im2col_row_pixel[in_channel] = in_value;
+              }
+            }
+          }
+          const bool is_last_in_chunk =
+              (patch_index_within_chunk == (patches_per_chunk - 1));
+          const bool is_last_overall =
+              ((batch == (input_batches - 1)) &&
+               (out_y == (output_height - 1)) && (out_x == (output_width - 1)));
+          if (is_last_in_chunk || is_last_overall) {
+            // Now we've assembled a set of image patches into a matrix, apply a
+            // GEMM matrix multiply of the patches as rows, times the filter
+            // weights in columns, to get partial results in the output matrix.
+            const int how_many_patches = patch_index_within_chunk + 1;
+            const int m = how_many_patches;
+            const int n = filter_count;
+            const int k = filter_value_count;
+            const int lda = filter_value_count;
+            const int ldb = filter_count;
+            const int ldc = filter_count;
+            const size_t start_patch_index =
+                patch_index - (how_many_patches - 1);
+            T3* chunk_output_data =
+                output_data + (start_patch_index * filter_count);
+            TGemmFunctor gemm_functor;
+            gemm_functor(m, n, k, im2col_buffer, lda, filter_data, ldb,
+                         chunk_output_data, ldc);
+          }
+        }
+      }
+    }
+  }
+};
+
+}  // namespace
+
+// Implements a version of convolution with bilinear resizing and mirror padding
+// included.
+template <class T, class TConvFunctor>
+class FusedResizeConv2DUsingGemmOp : public OpKernel {
+ public:
+  explicit FusedResizeConv2DUsingGemmOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("resize_align_corners", &align_corners_));
+    MirrorPadMode mode;
+    OP_REQUIRES_OK(context, context->GetAttr("mode", &mode));
+
+    switch (mode) {
+      case MirrorPadMode::SYMMETRIC: {
+        offset_ = 0;
+        break;
+      }
+      case MirrorPadMode::REFLECT: {
+        offset_ = 1;
+        break;
+      }
+      default:
+        OP_REQUIRES(context, false,
+                    errors::InvalidArgument(
+                        "mode must be either REFLECT or SYMMETRIC."));
+    }
+    OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_));
+    OP_REQUIRES(context, strides_.size() == 4,
+                errors::InvalidArgument("Sliding window strides field must "
+                                        "specify 4 dimensions"));
+    const int64 stride_n = GetTensorDim(strides_, FORMAT_NHWC, 'N');
+    const int64 stride_c = GetTensorDim(strides_, FORMAT_NHWC, 'C');
+    OP_REQUIRES(
+        context, stride_n == 1 && stride_c == 1,
+        errors::InvalidArgument("Current implementation does not yet support "
+                                "strides in the batch and depth dimensions."));
+    OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Input tensor is of the following dimensions:
+    // [ batch, in_rows, in_cols, in_depth ]
+    const Tensor& input = context->input(0);
+    OP_REQUIRES(context, (input.shape().num_elements() > 0),
+                errors::InvalidArgument("Input tensor can't be empty"));
+
+    ImageResizerState st(align_corners_);
+    st.ValidateAndCalculateOutputSize(context, input);
+    if (!context->status().ok()) return;
+    const TensorShape resized_shape(
+        {input.dim_size(0), st.out_height, st.out_width, input.dim_size(3)});
+
+    const Tensor& paddings = context->input(2);
+
+    const int dims = resized_shape.dims();
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsMatrix(paddings.shape()) &&
+                     paddings.dim_size(1) == 2,
+        errors::InvalidArgument("paddings must be a matrix with 2 columns: ",
+                                paddings.shape().DebugString()));
+    const int fixed_dims =
+        (allow_legacy_scalars() && dims == 0 && paddings.dim_size(0) == 1)
+            ? 1
+            : dims;
+    OP_REQUIRES(
+        context, fixed_dims == paddings.dim_size(0),
+        errors::InvalidArgument(
+            "The first dimension of paddings must be the rank of inputs: ",
+            fixed_dims, " ", paddings.shape().DebugString(), " ",
+            resized_shape.DebugString()));
+    OP_REQUIRES(
+        context, dims == paddings.dim_size(0),
+        errors::InvalidArgument(
+            "The first dimension of paddings must be the rank of inputs: ",
+            dims, " ", paddings.shape().DebugString(), " ",
+            resized_shape.DebugString()));
+
+    OP_REQUIRES(
+        context, dims == 4,
+        errors::InvalidArgument(
+            "Fused mirror padding only supports four-dimensional inputs, but ",
+            dims, " requested"));
+
+    // Compute the shape of the output tensor, and allocate it.
+    TensorShape padded_shape;
+    TTypes<int32>::ConstMatrix paddings_matrix = paddings.matrix<int32>();
+    for (int d = 0; d < dims; ++d) {
+      const int32 before =
+          paddings_matrix(d, 0);  // Pad before existing elements.
+      const int32 after =
+          paddings_matrix(d, 1);  // Pad after exisitng elements.
+      OP_REQUIRES(context, before >= 0 && after >= 0,
+                  errors::InvalidArgument("paddings must be non-negative: ",
+                                          before, " ", after));
+      if (offset_ == 0) {  // SYMMETRIC mode.
+        OP_REQUIRES(
+            context, before <= resized_shape.dim_size(d) &&
+                         after <= resized_shape.dim_size(d),
+            errors::InvalidArgument("paddings must be no greater "
+                                    "than the dimension size: ",
+                                    before, ", ", after, " greater than ",
+                                    resized_shape.dim_size(d)));
+      } else if (offset_ == 1) {  // REFLECT mode.
+        OP_REQUIRES(
+            context, before < resized_shape.dim_size(d) &&
+                         after < resized_shape.dim_size(d),
+            errors::InvalidArgument("paddings must be less than"
+                                    " the dimension size: ",
+                                    before, ", ", after, " not less than ",
+                                    resized_shape.dim_size(d)));
+      }
+      padded_shape.AddDim(before + resized_shape.dim_size(d) + after);
+    }
+
+    OP_REQUIRES(
+        context, ((paddings_matrix(0, 0) == 0) && (paddings_matrix(0, 1) == 0)),
+        errors::InvalidArgument(
+            "Fused mirror padding only support spatial padding, not batches: ",
+            paddings.DebugString()));
+    OP_REQUIRES(
+        context, ((paddings_matrix(3, 0) == 0) && (paddings_matrix(3, 1) == 0)),
+        errors::InvalidArgument(
+            "Fused mirror padding only support spatial padding, not channels: ",
+            paddings.DebugString()));
+    const int32 top_padding = paddings_matrix(1, 0);
+    const int32 bottom_padding = paddings_matrix(1, 1);
+    const int32 left_padding = paddings_matrix(2, 0);
+    const int32 right_padding = paddings_matrix(2, 1);
+
+    // Input filter is of the following dimensions:
+    // [ filter_rows, filter_cols, in_depth, out_depth]
+    const Tensor& filter = context->input(3);
+
+    // For 2D convolution, there should be 4 dimensions.
+    OP_REQUIRES(context, padded_shape.dims() == 4,
+                errors::InvalidArgument("input must be 4-dimensional",
+                                        padded_shape.DebugString()));
+    OP_REQUIRES(context, filter.dims() == 4,
+                errors::InvalidArgument("filter must be 4-dimensional: ",
+                                        filter.shape().DebugString()));
+
+    // We only check the first three dims, since the depth is accessed as an
+    // int64 below.
+    for (int i = 0; i < 3; i++) {
+      OP_REQUIRES(context, FastBoundsCheck(filter.dim_size(i),
+                                           std::numeric_limits<int>::max()),
+                  errors::InvalidArgument("filter too large"));
+    }
+
+    // The last dimension for input is in_depth. It must be the same as the
+    // filter's in_depth.
+    const int64 in_depth = padded_shape.dim_size(3);
+    OP_REQUIRES(
+        context, in_depth == filter.dim_size(2),
+        errors::InvalidArgument("input and filter must have the same depth: ",
+                                in_depth, " vs ", filter.dim_size(2)));
+
+    // The last dimension for filter is out_depth.
+    const int out_depth = static_cast<int>(filter.dim_size(3));
+
+    // The second dimension for input is rows/height.
+    // The first dimension for filter is rows/height.
+    const int64 padded_rows_raw = padded_shape.dim_size(1);
+    OP_REQUIRES(context, FastBoundsCheck(padded_rows_raw,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("Input rows too large"));
+    const int padded_rows = static_cast<int>(padded_rows_raw);
+    const int filter_rows = static_cast<int>(filter.dim_size(0));
+    const int resized_rows = static_cast<int>(resized_shape.dim_size(1));
+
+    // The third dimension for input is columns/width.
+    // The second dimension for filter is columns/width.
+    const int64 padded_cols_raw = padded_shape.dim_size(2);
+    OP_REQUIRES(context, FastBoundsCheck(padded_cols_raw,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("Input cols too large"));
+    const int padded_cols = static_cast<int>(padded_cols_raw);
+    const int filter_cols = static_cast<int>(filter.dim_size(1));
+    const int resized_cols = static_cast<int>(resized_shape.dim_size(2));
+
+    // The first dimension for input is batch.
+    const int64 batch_raw = padded_shape.dim_size(0);
+    OP_REQUIRES(context,
+                FastBoundsCheck(batch_raw, std::numeric_limits<int>::max()),
+                errors::InvalidArgument("batch is too large"));
+    const int batch = static_cast<int>(batch_raw);
+
+    // For now we take the stride from the second and third dimensions only (we
+    // do not support striding on the batch or depth dimension).
+    const int stride_rows = GetTensorDim(strides_, FORMAT_NHWC, 'H');
+    const int stride_cols = GetTensorDim(strides_, FORMAT_NHWC, 'W');
+
+    int64 out_rows = 0, out_cols = 0, pad_rows = 0, pad_cols = 0;
+    OP_REQUIRES_OK(context,
+                   GetWindowedOutputSize(padded_rows, filter_rows, stride_rows,
+                                         padding_, &out_rows, &pad_rows));
+    OP_REQUIRES_OK(context,
+                   GetWindowedOutputSize(padded_cols, filter_cols, stride_cols,
+                                         padding_, &out_cols, &pad_cols));
+    TensorShape out_shape =
+        ShapeFromFormat(FORMAT_NHWC, batch, out_rows, out_cols, out_depth);
+    OP_REQUIRES(context, (out_shape.num_elements() > 0),
+                errors::InvalidArgument("Output tensor can't be empty"));
+
+    // Output tensor is of the following dimensions:
+    // [ in_batch, out_rows, out_cols, out_depth ]
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
+
+    VLOG(2) << "Conv2D: in_depth = " << in_depth
+            << ", padded_cols = " << padded_cols
+            << ", filter_cols = " << filter_cols
+            << ", padded_rows = " << padded_rows
+            << ", filter_rows = " << filter_rows
+            << ", stride_rows = " << stride_rows
+            << ", stride_cols = " << stride_cols
+            << ", out_depth = " << out_depth;
+
+    // If there is nothing to compute, return.
+    if (out_shape.num_elements() == 0) {
+      return;
+    }
+    TConvFunctor conv_functor;
+    conv_functor(context, input, batch, resized_rows, resized_cols, padded_rows,
+                 padded_cols, in_depth, filter.flat<T>().data(), filter_rows,
+                 filter_cols, out_depth, stride_rows, stride_cols, padding_,
+                 output->flat<T>().data(), out_rows, out_cols, st, top_padding,
+                 bottom_padding, left_padding, right_padding, offset_);
+  }
+
+ private:
+  std::vector<int32> strides_;
+  Padding padding_;
+  bool align_corners_;
+  int offset_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(FusedResizeConv2DUsingGemmOp);
+};
+
+#define REGISTER_FUSED(T)             \
+  REGISTER_KERNEL_BUILDER(            \
+      Name("FusedResizeAndPadConv2D") \
+          .Device(DEVICE_CPU)         \
+          .TypeConstraint<T>("T"),    \
+      FusedResizeConv2DUsingGemmOp<   \
+          T,                          \
+          FusedResizeAndPadConvFunctor<T, T, T, FastGemmFunctor<T, T, T>>>);
+
+TF_CALL_float(REGISTER_FUSED);
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc
new file mode 100644
index 00000000000..228f2d5defa
--- /dev/null
+++ b/tensorflow/core/kernels/conv_ops_test.cc
@@ -0,0 +1,240 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/ops/const_op.h"
+#include "tensorflow/cc/ops/image_ops.h"
+#include "tensorflow/cc/ops/nn_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+#include "tensorflow/core/public/session.h"
+
+namespace tensorflow {
+
+class FusedResizePadConvOpTest : public OpsTestBase {
+ protected:
+  void HandwrittenConv() {
+    const int stride = 1;
+    TF_EXPECT_OK(NodeDefBuilder("fused_resize_op", "FusedResizeAndPadConv2D")
+                     .Input(FakeInput(DT_FLOAT))
+                     .Input(FakeInput(DT_INT32))
+                     .Input(FakeInput(DT_INT32))
+                     .Input(FakeInput(DT_FLOAT))
+                     .Attr("T", DT_FLOAT)
+                     .Attr("resize_align_corners", false)
+                     .Attr("mode", "REFLECT")
+                     .Attr("strides", {1, stride, stride, 1})
+                     .Attr("padding", "SAME")
+                     .Finalize(node_def()));
+    TF_EXPECT_OK(InitOp());
+    const int depth = 1;
+    const int image_width = 4;
+    const int image_height = 3;
+    const int image_batch_count = 1;
+    // The image matrix is:
+    // |  1 |  2 |  3 |  4 |
+    // |  5 |  6 |  7 |  8 |
+    // |  9 | 10 | 11 | 12 |
+    Tensor image(DT_FLOAT,
+                 {image_batch_count, image_height, image_width, depth});
+    test::FillValues<float>(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
+
+    // The filter matrix is:
+    // | 1 | 4 | 7 |
+    // | 2 | 5 | 8 |
+    // | 3 | 6 | 9 |
+    const int filter_size = 3;
+    const int filter_count = 1;
+    Tensor filter(DT_FLOAT, {filter_size, filter_size, depth, filter_count});
+    test::FillValues<float>(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9});
+
+    const int resized_width = image_width;
+    const int resized_height = image_height;
+
+    const int top_padding = 0;
+    const int bottom_padding = 0;
+    const int left_padding = 0;
+    const int right_padding = 0;
+
+    AddInputFromArray<float>(image.shape(), image.flat<float>());
+    AddInputFromArray<int32>(TensorShape({2}), {resized_height, resized_width});
+    AddInputFromArray<int32>(
+        TensorShape({4, 2}),
+        {0, 0, top_padding, bottom_padding, left_padding, right_padding, 0, 0});
+    AddInputFromArray<float>(filter.shape(), filter.flat<float>());
+    TF_ASSERT_OK(RunOpKernel());
+
+    // We're sliding the 3x3 filter across the 3x4 image, with accesses outside
+    // the input set to zero because we're using the 'SAME' padding mode.
+    // The calculations behind the expected output are:
+    // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)=105
+    // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)=150
+    // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)=183
+    // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)=95
+    // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)=235
+    // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)=312
+    // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)=357
+    // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)=178
+    // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)=187
+    // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)=234
+    // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)=261
+    // (1*7)+(4*11)+(7*0)+(2*8)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121
+    // This means we should end up with this matrix:
+    // |  105  |  150  |  183  |   95  |
+    // |  235  |  312  |  357  |  178  |
+    // |  187  |  234  |  261  |  121  |
+    const int expected_width = image_width;
+    const int expected_height = image_height * filter_count;
+    Tensor expected(DT_FLOAT, TensorShape({image_batch_count, expected_height,
+                                           expected_width, filter_count}));
+    test::FillValues<float>(
+        &expected, {105, 150, 183, 95, 235, 312, 357, 178, 187, 234, 261, 121});
+    const Tensor& output = *GetOutput(0);
+    test::ExpectTensorNear<float>(expected, output, 1e-5);
+  }
+
+  void CompareFusedAndSeparate(int input_width, int input_height,
+                               int input_depth, int resize_width,
+                               int resize_height, int y_padding, int x_padding,
+                               int filter_size, int filter_count,
+                               bool resize_align_corners, string pad_mode,
+                               int stride, string padding) {
+    auto root = tensorflow::Scope::NewRootScope();
+    using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
+
+    const size_t input_data_size = input_height * input_width * input_depth;
+    Tensor input_data(DT_FLOAT,
+                      TensorShape({1, input_height, input_width, input_depth}));
+    for (int i = 0; i < input_data_size; ++i) {
+      input_data.flat<float>()(i) = i + 1.0f;
+    }
+    Output input =
+        Const(root.WithOpName("input"), Input::Initializer(input_data));
+
+    const size_t filter_data_size =
+        filter_size * filter_size * filter_count * input_depth;
+    Tensor filter_data(DT_FLOAT, TensorShape({filter_size, filter_size,
+                                              input_depth, filter_count}));
+    for (int i = 0; i < filter_data_size; ++i) {
+      filter_data.flat<float>()(i) = i + 1.0f;
+    }
+    Output filter =
+        Const(root.WithOpName("filter"), Input::Initializer(filter_data));
+
+    Output resize_size =
+        Const(root.WithOpName("resize_size"), {resize_height, resize_width});
+    Output resize =
+        ResizeBilinear(root.WithOpName("resize"), input, resize_size,
+                       ResizeBilinear::AlignCorners(resize_align_corners));
+    Output paddings =
+        Const(root.WithOpName("paddings"),
+              {{0, 0}, {y_padding, y_padding}, {x_padding, x_padding}, {0, 0}});
+    Output mirror_pad =
+        MirrorPad(root.WithOpName("mirror_pad"), resize, paddings, pad_mode);
+    Output conv = Conv2D(root.WithOpName("conv"), mirror_pad, filter,
+                         {1, stride, stride, 1}, padding);
+
+    Output fused_conv = FusedResizeAndPadConv2D(
+        root.WithOpName("fused_conv"), input, resize_size, paddings, filter,
+        pad_mode, {1, stride, stride, 1}, padding,
+        FusedResizeAndPadConv2D::ResizeAlignCorners(resize_align_corners));
+
+    tensorflow::GraphDef graph;
+    TF_ASSERT_OK(root.ToGraphDef(&graph));
+
+    std::unique_ptr<tensorflow::Session> session(
+        tensorflow::NewSession(tensorflow::SessionOptions()));
+    TF_ASSERT_OK(session->Create(graph));
+
+    std::vector<Tensor> unfused_tensors;
+    TF_ASSERT_OK(session->Run({}, {"conv"}, {}, &unfused_tensors));
+
+    std::vector<Tensor> fused_tensors;
+    TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors));
+
+    test::ExpectTensorNear<float>(unfused_tensors[0], fused_tensors[0], 1e-5);
+  }
+};
+
+TEST_F(FusedResizePadConvOpTest, HandwrittenConv) { HandwrittenConv(); }
+
+TEST_F(FusedResizePadConvOpTest, IdentityComparative) {
+  CompareFusedAndSeparate(10, 10, 1, 10, 10, 0, 0, 1, 1, false, "REFLECT", 1,
+                          "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, ConvOnlyComparative) {
+  CompareFusedAndSeparate(10, 10, 3, 10, 10, 0, 0, 4, 4, false, "REFLECT", 1,
+                          "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, ResizeOnlyComparative) {
+  CompareFusedAndSeparate(10, 10, 1, 20, 20, 0, 0, 1, 1, false, "REFLECT", 1,
+                          "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, ResizeAndConvComparative) {
+  CompareFusedAndSeparate(2, 2, 4, 4, 2, 0, 0, 2, 2, false, "REFLECT", 1,
+                          "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, ResizeAlignAndConvComparative) {
+  CompareFusedAndSeparate(2, 2, 4, 4, 2, 0, 0, 2, 2, true, "REFLECT", 1,
+                          "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, ResizeAndConvStridedComparative) {
+  CompareFusedAndSeparate(2, 2, 4, 4, 2, 0, 0, 2, 2, false, "REFLECT", 2,
+                          "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, ResizeAlignAndConvValidComparative) {
+  CompareFusedAndSeparate(2, 2, 4, 4, 2, 0, 0, 2, 2, true, "REFLECT", 1,
+                          "VALID");
+}
+
+TEST_F(FusedResizePadConvOpTest, PadOnlyComparative) {
+  CompareFusedAndSeparate(4, 4, 1, 4, 4, 2, 2, 1, 1, false, "REFLECT", 1,
+                          "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, PadOnlyWithChannelsComparative) {
+  CompareFusedAndSeparate(4, 4, 3, 4, 4, 2, 2, 1, 1, false, "REFLECT", 1,
+                          "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, ResizeAndPadComparative) {
+  CompareFusedAndSeparate(4, 4, 1, 6, 6, 2, 2, 1, 1, false, "REFLECT", 1,
+                          "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, PadOnlySymmetricComparative) {
+  CompareFusedAndSeparate(4, 4, 1, 4, 4, 2, 2, 1, 1, false, "SYMMETRIC", 1,
+                          "SAME");
+}
+
+TEST_F(FusedResizePadConvOpTest, ResizeAndPadSymmetricComparative) {
+  CompareFusedAndSeparate(4, 4, 3, 6, 6, 2, 2, 1, 1, false, "SYMMETRIC", 1,
+                          "SAME");
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/conv_ops_using_gemm.cc b/tensorflow/core/kernels/conv_ops_using_gemm.cc
index c39510a11a2..6da6da846b4 100644
--- a/tensorflow/core/kernels/conv_ops_using_gemm.cc
+++ b/tensorflow/core/kernels/conv_ops_using_gemm.cc
@@ -56,14 +56,13 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_slice.h"
 #include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/conv_ops.h"
+#include "tensorflow/core/kernels/gemm_functors.h"
+#include "tensorflow/core/kernels/image_resizer_state.h"
+#include "tensorflow/core/util/mirror_pad_mode.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 
-#if defined(__APPLE__)
-#include <Accelerate/Accelerate.h>
-#define USE_ACCELERATE_GEMM
-#endif  // __APPLE__
-
 namespace tensorflow {
 
 namespace {
@@ -189,87 +188,6 @@ class ReferenceConvFunctor {
   }
 };
 
-// A readable but slow implementation of matrix multiplication, useful for
-// debugging and understanding the algorithm. Use instead of FastGemmFunctor in
-// the Im2ColConvFunctor template definition inside the op registration to
-// enable. Assumes row-major ordering of the values in memory.
-template <class T1, class T2, class T3>
-class ReferenceGemmFunctor {
- public:
-  void operator()(size_t m, size_t n, size_t k, const T1* a, size_t lda,
-                  const T2* b, size_t ldb, T3* c, size_t ldc) {
-    const size_t a_i_stride = lda;
-    const size_t a_l_stride = 1;
-    const size_t b_j_stride = 1;
-    const size_t b_l_stride = ldb;
-    const size_t c_i_stride = ldc;
-    const size_t c_j_stride = 1;
-    size_t i, j, l;
-    for (j = 0; j < n; j++) {
-      for (i = 0; i < m; i++) {
-        T3 total(0);
-        for (l = 0; l < k; l++) {
-          const size_t a_index = ((i * a_i_stride) + (l * a_l_stride));
-          const T1 a_value = a[a_index];
-          const size_t b_index = ((j * b_j_stride) + (l * b_l_stride));
-          const T2 b_value = b[b_index];
-          total += (a_value * b_value);
-        }
-        const size_t c_index = ((i * c_i_stride) + (j * c_j_stride));
-        c[c_index] = total;
-      }
-    }
-  }
-};
-
-// Uses the optimized Eigen library to implement the matrix multiplication
-// required by the Im2ColConvFunctor class. We supply the two input and one
-// output types so that the accumulator can potentially be higher-precision than
-// the inputs, even though we don't currently take advantage of this.
-template <class T1, class T2, class T3>
-class FastGemmFunctor {
- public:
-  // Convenience wrappers for the Eigen matrix types we'll be using.
-  typedef Eigen::Map<
-      const Eigen::Matrix<T1, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
-      ConstMatrixT1;
-  typedef Eigen::Map<
-      const Eigen::Matrix<T2, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
-      ConstMatrixT2;
-  typedef Eigen::Map<
-      Eigen::Matrix<T3, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
-      MatrixT3;
-  void operator()(size_t m, size_t n, size_t k, const T1* a, size_t lda,
-                  const T2* b, size_t ldb, T3* c, size_t ldc) {
-    ConstMatrixT1 a_matrix(a, m, k);
-    ConstMatrixT2 b_matrix(b, k, n);
-    MatrixT3 c_matrix(c, m, n);
-    c_matrix.noalias() = a_matrix * b_matrix;
-  }
-};
-
-// If we have Apple's Accelerate framework, use their implementation of GEMM to
-// get a performance boost for float.
-#if defined(USE_ACCELERATE_GEMM)
-template <>
-class FastGemmFunctor<float, float, float> {
- public:
-  void operator()(size_t m, size_t n, size_t k, const float* a, size_t lda,
-                  const float* b, size_t ldb, float* c, size_t ldc) {
-    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0f, a,
-                lda, b, ldb, 0.0f, c, ldc);
-  }
-};
-#endif  // USE_ACCELERATE_GEMM
-
-// Used to keep track of persistent memory buffers used within the op.
-template <class T, size_t size>
-struct Im2ColBufferResource : public ResourceBase {
-  mutex mu;
-  T data[size];
-  string DebugString() { return "Im2ColBufferResource"; }
-};
-
 // Implements convolution as a two stage process, first packing the patches of
 // the input image into columns (im2col) and then running GEMM to produce the
 // final result.
@@ -344,7 +262,6 @@ class Im2ColConvFunctor {
                 errors::InvalidArgument("Im2Col patch too large for buffer"));
     const size_t patches_per_chunk =
         max_chunk_size / (filter_value_count * sizeof(T1));
-
     // Because memory allocation is very expensive on mobile platforms, try to
     // allocate a persistent buffer that will be kept around between calls. We
     // use TensorFlow's resource management to ensure that the memory will be
diff --git a/tensorflow/core/kernels/cwise_ops_gradients.h b/tensorflow/core/kernels/cwise_ops_gradients.h
index 47d5410d0aa..cd233054771 100644
--- a/tensorflow/core/kernels/cwise_ops_gradients.h
+++ b/tensorflow/core/kernels/cwise_ops_gradients.h
@@ -99,13 +99,15 @@ struct scalar_sqrt_gradient_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_gradient_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T
   operator()(const T& output, const T& output_gradient) const {
-    return static_cast<T>(0.5) * output_gradient / output;
+    const T out_conj = numext::conj(output);
+    return static_cast<T>(0.5) * output_gradient / out_conj;
   }
   template <typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet
   packetOp(const Packet& output, const Packet& output_gradient) const {
     const Packet const_half = pset1<Packet>(static_cast<T>(0.5));
-    return pdiv(pmul(const_half, output_gradient), output);
+    const Packet out_conj = pconj(output);
+    return pdiv(pmul(const_half, output_gradient), out_conj);
   }
 };
 template <typename T>
@@ -123,15 +125,17 @@ struct scalar_rsqrt_gradient_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_rsqrt_gradient_op)
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T
   operator()(const T& output, const T& output_gradient) const {
-    return static_cast<T>(-0.5) * (output_gradient * output) *
-           (output * output);
+    const T out_conj = numext::conj(output);
+    return static_cast<T>(-0.5) * (output_gradient * out_conj) *
+           (out_conj * out_conj);
   }
   template <typename Packet>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet
   packetOp(const Packet& output, const Packet& output_gradient) const {
     const Packet const_half = pset1<Packet>(static_cast<T>(-0.5));
-    return pmul(const_half,
-                pmul(pmul(output_gradient, output), pmul(output, output)));
+    const Packet out_conj = pconj(output);
+    return pmul(const_half, pmul(pmul(output_gradient, out_conj),
+                                 pmul(out_conj, out_conj)));
   }
 };
 template <typename T>
diff --git a/tensorflow/core/kernels/draw_bounding_box_op.cc b/tensorflow/core/kernels/draw_bounding_box_op.cc
index 4b5688c11f8..a8818b7385d 100644
--- a/tensorflow/core/kernels/draw_bounding_box_op.cc
+++ b/tensorflow/core/kernels/draw_bounding_box_op.cc
@@ -35,21 +35,49 @@ class DrawBoundingBoxesOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& images = context->input(0);
     const Tensor& boxes = context->input(1);
+    const int64 depth = images.dim_size(3);
 
     OP_REQUIRES(context, images.dims() == 4,
                 errors::InvalidArgument("The rank of the images should be 4"));
     OP_REQUIRES(
         context, boxes.dims() == 3,
         errors::InvalidArgument("The rank of the boxes tensor should be 3"));
-
     OP_REQUIRES(context, images.dim_size(0) == boxes.dim_size(0),
                 errors::InvalidArgument("The batch sizes should be the same"));
 
+    OP_REQUIRES(
+        context, depth == 4 || depth == 1 || depth == 3,
+        errors::InvalidArgument("Channel depth should be either 1 (GRY), "
+                                "3 (RGB), or 4 (RGBA)"));
+
     const int64 batch_size = images.dim_size(0);
     const int64 height = images.dim_size(1);
     const int64 width = images.dim_size(2);
-    const int64 depth = images.dim_size(3);
+    const int64 color_table_length = 10;
 
+    // 0: yellow
+    // 1: blue
+    // 2: red
+    // 3: lime
+    // 4: purple
+    // 5: olive
+    // 6: maroon
+    // 7: navy blue
+    // 8: aqua
+    // 9: fuchsia
+    float color_table[color_table_length][4] = {
+        {1, 1, 0, 1},     {0, 0, 1, 1},     {1, 0, 0, 1},   {0, 1, 0, 1},
+        {0.5, 0, 0.5, 1}, {0.5, 0.5, 0, 1}, {0.5, 0, 0, 1}, {0, 0, 0.5, 1},
+        {0, 1, 1, 1},     {1, 0, 1, 1},
+    };
+
+    // Reset first color channel to 1 if image is GRY.
+    // For GRY images, this means all bounding boxes will be white.
+    if (depth == 1) {
+      for (int64 i = 0; i < color_table_length; i++) {
+        color_table[i][0] = 1;
+      }
+    }
     Tensor* output;
     OP_REQUIRES_OK(
         context,
@@ -62,8 +90,8 @@ class DrawBoundingBoxesOp : public OpKernel {
     for (int64 b = 0; b < batch_size; ++b) {
       const int64 num_boxes = boxes.dim_size(1);
       const auto tboxes = boxes.tensor<T, 3>();
-
       for (int64 bb = 0; bb < num_boxes; ++bb) {
+        int64 color_index = bb % color_table_length;
         const int64 min_box_row =
             static_cast<float>(tboxes(b, bb, 0)) * (height - 1);
         const int64 min_box_row_clamp =
@@ -122,22 +150,34 @@ class DrawBoundingBoxesOp : public OpKernel {
         // Draw top line.
         if (min_box_row >= 0) {
           for (int64 j = min_box_col_clamp; j <= max_box_col_clamp; ++j)
-            canvas(b, min_box_row, j, 0) = Eigen::NumTraits<T>::quiet_NaN();
+            for (int64 c = 0; c < depth; c++) {
+              canvas(b, min_box_row, j, c) =
+                  static_cast<T>(color_table[color_index][c]);
+            }
         }
         // Draw bottom line.
         if (max_box_row < height) {
           for (int64 j = min_box_col_clamp; j <= max_box_col_clamp; ++j)
-            canvas(b, max_box_row, j, 0) = Eigen::NumTraits<T>::quiet_NaN();
+            for (int64 c = 0; c < depth; c++) {
+              canvas(b, max_box_row, j, c) =
+                  static_cast<T>(color_table[color_index][c]);
+            }
         }
         // Draw left line.
         if (min_box_col >= 0) {
           for (int64 i = min_box_row_clamp; i <= max_box_row_clamp; ++i)
-            canvas(b, i, min_box_col, 0) = Eigen::NumTraits<T>::quiet_NaN();
+            for (int64 c = 0; c < depth; c++) {
+              canvas(b, i, min_box_col, c) =
+                  static_cast<T>(color_table[color_index][c]);
+            }
         }
         // Draw right line.
         if (max_box_col < width) {
           for (int64 i = min_box_row_clamp; i <= max_box_row_clamp; ++i)
-            canvas(b, i, max_box_col, 0) = Eigen::NumTraits<T>::quiet_NaN();
+            for (int64 c = 0; c < depth; c++) {
+              canvas(b, i, max_box_col, c) =
+                  static_cast<T>(color_table[color_index][c]);
+            }
         }
       }
     }
diff --git a/tensorflow/core/kernels/gather_nd_op.cc b/tensorflow/core/kernels/gather_nd_op.cc
index c2a5192efb1..73f30cdae37 100644
--- a/tensorflow/core/kernels/gather_nd_op.cc
+++ b/tensorflow/core/kernels/gather_nd_op.cc
@@ -53,7 +53,7 @@ class GatherNdOp : public OpKernel {
             "index innermost dimension length must be <= params rank; saw: ",
             indices.dim_size(indices.dims() - 1), " vs. ", params.dims()));
 
-    TensorShape indices_shape(indices.shape());
+    const TensorShape& indices_shape(indices.shape());
     const int64 indices_nd = indices_shape.dim_size(indices_shape.dims() - 1);
 
     // Check that we have enough index space
@@ -79,7 +79,7 @@ class GatherNdOp : public OpKernel {
       N_result *= indices_shape.dim_size(i);
     }
 
-    TensorShape params_shape(params.shape());
+    const TensorShape& params_shape(params.shape());
     Index total_nd = params_shape.dims();
 
     TensorShape result_shape(indices_shape);
diff --git a/tensorflow/core/kernels/gemm_functors.h b/tensorflow/core/kernels/gemm_functors.h
new file mode 100644
index 00000000000..d37008d5cfb
--- /dev/null
+++ b/tensorflow/core/kernels/gemm_functors.h
@@ -0,0 +1,105 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This is a set of different implementations for the basic matrix by matrix
+// multiply function, commonly known as GEMM after the BLAS library's naming.
+// Having a standard interface enables us to swap out implementations on
+// different platforms, to make sure we're using the optimal version. They are
+// implemented as C++ template functors, so they're easy to swap into all of the
+// different kernels that use them.
+
+#include <string.h>
+#include <map>
+#include <vector>
+
+#include "tensorflow/core/framework/tensor.h"
+
+#if defined(__APPLE__) && defined(USE_GEMM_FOR_CONV)
+#include <Accelerate/Accelerate.h>
+#define USE_ACCELERATE_GEMM
+#endif  // __APPLE__
+
+// A readable but slow implementation of matrix multiplication, useful for
+// debugging and understanding the algorithm. Use instead of FastGemmFunctor in
+// the Im2ColConvFunctor template definition inside the op registration to
+// enable. Assumes row-major ordering of the values in memory.
+template <class T1, class T2, class T3>
+class ReferenceGemmFunctor {
+ public:
+  void operator()(size_t m, size_t n, size_t k, const T1* a, size_t lda,
+                  const T2* b, size_t ldb, T3* c, size_t ldc) {
+    const size_t a_i_stride = lda;
+    const size_t a_l_stride = 1;
+    const size_t b_j_stride = 1;
+    const size_t b_l_stride = ldb;
+    const size_t c_i_stride = ldc;
+    const size_t c_j_stride = 1;
+    size_t i, j, l;
+    for (j = 0; j < n; j++) {
+      for (i = 0; i < m; i++) {
+        T3 total(0);
+        for (l = 0; l < k; l++) {
+          const size_t a_index = ((i * a_i_stride) + (l * a_l_stride));
+          const T1 a_value = a[a_index];
+          const size_t b_index = ((j * b_j_stride) + (l * b_l_stride));
+          const T2 b_value = b[b_index];
+          total += (a_value * b_value);
+        }
+        const size_t c_index = ((i * c_i_stride) + (j * c_j_stride));
+        c[c_index] = total;
+      }
+    }
+  }
+};
+
+// Uses the optimized Eigen library to implement the matrix multiplication
+// required by the Im2ColConvFunctor class. We supply the two input and one
+// output types so that the accumulator can potentially be higher-precision than
+// the inputs, even though we don't currently take advantage of this.
+template <class T1, class T2, class T3>
+class FastGemmFunctor {
+ public:
+  // Convenience wrappers for the Eigen matrix types we'll be using.
+  typedef Eigen::Map<
+      const Eigen::Matrix<T1, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
+      ConstMatrixT1;
+  typedef Eigen::Map<
+      const Eigen::Matrix<T2, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
+      ConstMatrixT2;
+  typedef Eigen::Map<
+      Eigen::Matrix<T3, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
+      MatrixT3;
+  void operator()(size_t m, size_t n, size_t k, const T1* a, size_t lda,
+                  const T2* b, size_t ldb, T3* c, size_t ldc) {
+    ConstMatrixT1 a_matrix(a, m, k);
+    ConstMatrixT2 b_matrix(b, k, n);
+    MatrixT3 c_matrix(c, m, n);
+    c_matrix.noalias() = a_matrix * b_matrix;
+  }
+};
+
+// If we have Apple's Accelerate framework, use their implementation of GEMM to
+// get a performance boost for float.
+#if defined(USE_ACCELERATE_GEMM)
+template <>
+class FastGemmFunctor<float, float, float> {
+ public:
+  void operator()(size_t m, size_t n, size_t k, const float* a, size_t lda,
+                  const float* b, size_t ldb, float* c, size_t ldc) {
+    cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0f, a,
+                lda, b, ldb, 0.0f, c, ldc);
+  }
+};
+#endif  // USE_ACCELERATE_GEMM
diff --git a/tensorflow/core/kernels/image_resizer_state.h b/tensorflow/core/kernels/image_resizer_state.h
index a7acb5e649b..8870937422a 100644
--- a/tensorflow/core/kernels/image_resizer_state.h
+++ b/tensorflow/core/kernels/image_resizer_state.h
@@ -49,12 +49,13 @@ struct ImageResizerState {
   explicit ImageResizerState(bool align_corners)
       : align_corners_(align_corners) {}
 
-  // ValidateAndCreateOutput checks the bounds on the input tensors
+  // ValidateAndCalculateOutputSize checks the bounds on the input tensors
   // and requested size, sets up some of the resizing state such as the
-  // height_scale and width_scale, and allocates the output.
+  // height_scale and width_scale, and calculates the output size.
   // If any of these operations fails, it sets an error status in
   // the context, which the caller must check.
-  void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input) {
+  void ValidateAndCalculateOutputSize(OpKernelContext* context,
+                                      const Tensor& input) {
     OP_REQUIRES(context, input.dims() == 4,
                 errors::InvalidArgument("input must be 4-dimensional",
                                         input.shape().DebugString()));
@@ -87,12 +88,18 @@ struct ImageResizerState {
     OP_REQUIRES(
         context, input.dim_size(1) > 0 && input.dim_size(2) > 0,
         errors::InvalidArgument("input image must be of non-zero size"));
+    height_scale = CalculateResizeScale(in_height, out_height, align_corners_);
+    width_scale = CalculateResizeScale(in_width, out_width, align_corners_);
+  }
+
+  // Calculates all the required variables, and allocates the output.
+  void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input) {
+    ValidateAndCalculateOutputSize(context, input);
+    if (!context->status().ok()) return;
     OP_REQUIRES_OK(context, context->allocate_output(
                                 0, TensorShape({input.dim_size(0), out_height,
                                                 out_width, input.dim_size(3)}),
                                 &output));
-    height_scale = CalculateResizeScale(in_height, out_height, align_corners_);
-    width_scale = CalculateResizeScale(in_width, out_width, align_corners_);
   }
 
   int64 batch_size;
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index 97e2bfcad54..27888d3a313 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -272,7 +272,7 @@ class MaxPoolingGradOp : public OpKernel {
     OP_REQUIRES(context, out_backprop.dims() == 4,
                 errors::InvalidArgument("out_backprop must be 4-dimensional"));
 
-    TensorShape output_shape = tensor_in.shape();
+    const TensorShape& output_shape = tensor_in.shape();
 
     Tensor tensor_out_dup;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/ops_testutil.h b/tensorflow/core/kernels/ops_testutil.h
index eae5187896e..3baae914cbf 100644
--- a/tensorflow/core/kernels/ops_testutil.h
+++ b/tensorflow/core/kernels/ops_testutil.h
@@ -185,6 +185,7 @@ class OpsTestBase : public ::testing::Test {
     test::SetOutputAttrs(params_.get(), &attrs);
     checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_wrapper;
     params_.get()->slice_reader_cache = &slice_reader_cache_wrapper;
+    params_.get()->resource_manager = device_.get()->resource_manager();
 
     context_.reset(new OpKernelContext(params_.get()));
     device_->Compute(kernel_.get(), context_.get());
diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc
index 0aacf9df9d4..0a1de111627 100644
--- a/tensorflow/core/kernels/random_op.cc
+++ b/tensorflow/core/kernels/random_op.cc
@@ -34,6 +34,16 @@ limitations under the License.
 #include "tensorflow/core/util/guarded_philox_random.h"
 #include "tensorflow/core/util/work_sharder.h"
 
+#if EIGEN_COMP_GNUC && __cplusplus > 199711L
+#define DISABLE_FLOAT_EQUALITY_WARNING \
+  _Pragma("GCC diagnostic push")       \
+      _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
+#define ENABLE_FLOAT_EQUALITY_WARNING _Pragma("GCC diagnostic pop")
+#else
+#define DISABLE_FLOAT_EQUALITY_WARNING
+#define ENABLE_FLOAT_EQUALITY_WARNING
+#endif
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -355,47 +365,23 @@ class RandomGammaOp : public OpKernel {
         // Several calculations can be done on a per-alpha basis.
         const double alpha = static_cast<double>(alpha_flat[alpha_idx]);
 
-        if (alpha < 0.3) {
-          // For very small alpha, we use the log-space algorithm proposed in
-          // "Simulating from a gamma distribution with small shape parameter",
-          // http://arxiv.org/abs/1302.1884
-          const double lambda = 1 / alpha - 1;
-          const double w = alpha / (M_E /* exp(1) */ * (1 - alpha));
-          const double r = 1 / (1 + w);
-
-          // Compute the rest of the samples for the current alpha value.
+        DISABLE_FLOAT_EQUALITY_WARNING
+        if (alpha == double(1.0)) {
+          ENABLE_FLOAT_EQUALITY_WARNING
+          // Sample from an exponential distribution.
           for (int64 sample_idx = output_idx % num_samples;
                sample_idx < num_samples && output_idx < limit_output;
                sample_idx++, output_idx++) {
-            // Since each sample may use a variable number of normal/uniform
-            // samples, and we want data stable regardless of sharding
+            // As we want data stable regardless of sharding
             // (including eventually on GPU), we skip on a per-sample basis.
             PhiloxRandom gen = rng;
             gen.Skip(kReservedSamplesPerOutput * output_idx);
             short uniform_remaining = 0;
-
-            // Keep trying until we don't reject a sample. In practice, we
-            // expect a low rejection rate.
-            while (true) {
-              UNIFORM(u);
-              double z;
-              if (u <= r) {
-                z = -log(u / r);
-              } else {
-                UNIFORM(v);
-                z = log(v) / lambda;
-              }
-              double eta = z >= 0 ? exp(-z) : w * lambda * exp(lambda * z);
-              UNIFORM(v);
-              double h = exp(-z - exp(-z / alpha));
-              if (h > eta * v) {
-                samples_alpha_offset[sample_idx * num_alphas] =
-                    static_cast<T>(exp(-z / alpha));
-                break;
-              }
-            }     // while: true
-          }       // for: sample_idx
-        } else {  // so, alpha >= 0.3
+            UNIFORM(u);
+            const double res = -log(1.0 - u);
+            samples_alpha_offset[sample_idx * num_alphas] = static_cast<T>(res);
+          }       // for (sample_idx)
+        } else {  // if alpha != 1.0
           // Transformation-rejection from pairs of uniform and normal random
           // variables. http://dl.acm.org/citation.cfm?id=358414
           //
@@ -454,7 +440,7 @@ class RandomGammaOp : public OpKernel {
               }
             }  // while: true
           }    // for: sample_idx
-        }      // if: alpha < 0.3
+        }      // if (alpha == 1.0)
       }        // for: output_idx
     };         // DoWork
 #undef UNIFORM
@@ -463,9 +449,7 @@ class RandomGammaOp : public OpKernel {
     // Other ops: sqrt, +, *, /, %... something like 15 of these, at 3-6 cycles
     // each = ~60.
     // All of this /0.95 due to the rejection possibility = ~85.
-    // All of this * ~2 to incorporate possibility of the log/exp branch for
-    // low-alpha. (1 log, 4 exp, 3/, 3*)
-    static const int kElementCost = 170 + 2 * Normal::kElementCost +
+    static const int kElementCost = 85 + 2 * Normal::kElementCost +
                                     Uniform::kElementCost +
                                     3 * PhiloxRandom::kElementCost;
     auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
diff --git a/tensorflow/core/kernels/restore_op_test.cc b/tensorflow/core/kernels/restore_op_test.cc
index 4774a06c44a..16bc31be27a 100644
--- a/tensorflow/core/kernels/restore_op_test.cc
+++ b/tensorflow/core/kernels/restore_op_test.cc
@@ -62,10 +62,10 @@ Tensor MakeInput(const TensorShape& shape,
 TEST_F(RestoreOpTest, RestoreSimple) {
   const string filename = io::JoinPath(testing::TmpDir(), "tensor_simple");
   const std::vector<string> tensor_names = {
-      "tensor_bool",  "tensor_int",    "tensor_float",  "tensor_double",
-      "tensor_qint8", "tensor_qint32", "tensor_uint8",  "tensor_int8",
-      "tensor_int16", "tensor_int64",  "tensor_string", "tensor_complex64",
-      "tensor_half"};
+      "tensor_bool",  "tensor_int",        "tensor_float",  "tensor_double",
+      "tensor_qint8", "tensor_qint32",     "tensor_uint8",  "tensor_int8",
+      "tensor_int16", "tensor_int64",      "tensor_string", "tensor_complex64",
+      "tensor_half",  "tensor_float_empty"};
 
   // We first need to write a tensor using the save_op
   {
@@ -164,6 +164,11 @@ TEST_F(RestoreOpTest, RestoreSimple) {
           return static_cast<Eigen::half>(x) / Eigen::half(5);
         });
     inputs.push_back({nullptr, &input_14});
+    // Input #15 is a 2-d empty float tensor
+    Tensor input_15 = MakeInput<float>(TensorShape({2, 0}), [](int x) -> float {
+      return static_cast<float>(x) / 10;
+    });
+    inputs.push_back({nullptr, &input_15});
     OpKernelContext::Params params;
     params.device = device.get();
     params.frame_iter = FrameAndIter(0, 0);
@@ -341,6 +346,15 @@ TEST_F(RestoreOpTest, RestoreSimple) {
                 output->flat<Eigen::half>()(i));
     }
   }
+  // The 2-d empty float tensor
+  {
+    MakeRestoreOp(DT_FLOAT);
+    (*mutable_input(1).tensor).scalar<string>()() = tensor_names[13];
+    TF_ASSERT_OK(RunOpKernel());
+    Tensor* output = GetOutput(0);
+    TensorShape expected({2, 0});
+    EXPECT_TRUE(output->shape().IsSameSize(expected));
+  }
 }
 
 class RestoreSliceOpTest : public OpsTestBase {
diff --git a/tensorflow/core/kernels/save_restore_tensor.cc b/tensorflow/core/kernels/save_restore_tensor.cc
index f4c8d83101d..9e0b59f1251 100644
--- a/tensorflow/core/kernels/save_restore_tensor.cc
+++ b/tensorflow/core/kernels/save_restore_tensor.cc
@@ -212,6 +212,8 @@ void RestoreTensor(OpKernelContext* context,
   Tensor* t = nullptr;
   OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &t));
 
+  if (output_shape.num_elements() == 0) return;
+
 #define READER_COPY(T)                                                      \
   case DataTypeToEnum<T>::value:                                            \
     reader->CopySliceData(tensor_name, slice_to_load, t->flat<T>().data()); \
diff --git a/tensorflow/core/kernels/scan_ops.cc b/tensorflow/core/kernels/scan_ops.cc
index 604e712b0fd..2604b738448 100644
--- a/tensorflow/core/kernels/scan_ops.cc
+++ b/tensorflow/core/kernels/scan_ops.cc
@@ -58,7 +58,7 @@ public:
         errors::InvalidArgument("ScanOp: Expected scan axis in the range [", 0,
                                 ", ", input.dims(), "), but got ", axis));
 
-    TensorShape output_shape = input.shape();
+    const TensorShape& output_shape = input.shape();
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output));
 
diff --git a/tensorflow/core/kernels/svd_op.cc b/tensorflow/core/kernels/svd_op.cc
index 36d04989581..74b5627024d 100644
--- a/tensorflow/core/kernels/svd_op.cc
+++ b/tensorflow/core/kernels/svd_op.cc
@@ -81,12 +81,12 @@ class SvdOp : public LinearAlgebraOp<Scalar> {
       svd.compute(inputs[0],
                   (full_matrices_ ? Eigen::ComputeFullU | Eigen::ComputeFullV
                                   : Eigen::ComputeThinU | Eigen::ComputeThinV));
-      outputs->at(0) = svd.singularValues();
+      outputs->at(0) = svd.singularValues().template cast<Scalar>();
       outputs->at(1) = svd.matrixU();
       outputs->at(2) = svd.matrixV();
     } else {
       svd.compute(inputs[0]);
-      outputs->at(0) = svd.singularValues();
+      outputs->at(0) = svd.singularValues().template cast<Scalar>();
     }
   }
 
diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc
index 73b0280a8f0..22801859e88 100644
--- a/tensorflow/core/lib/io/record_reader.cc
+++ b/tensorflow/core/lib/io/record_reader.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/hash/crc32c.h"
+#include "tensorflow/core/lib/io/random_inputstream.h"
 #include "tensorflow/core/platform/env.h"
 
 namespace tensorflow {
@@ -33,8 +34,9 @@ RecordReader::RecordReader(RandomAccessFile* file,
 #if defined(IS_SLIM_BUILD)
     LOG(FATAL) << "Zlib compression is unsupported on mobile platforms.";
 #else   // IS_SLIM_BUILD
-    zlib_input_buffer_.reset(new ZlibInputBuffer(
-        src_, options.zlib_options.input_buffer_size,
+    random_input_stream_.reset(new RandomAccessInputStream(file));
+    zlib_input_stream_.reset(new ZlibInputStream(
+        random_input_stream_.get(), options.zlib_options.input_buffer_size,
         options.zlib_options.output_buffer_size, options.zlib_options));
 #endif  // IS_SLIM_BUILD
   } else if (options.compression_type == RecordReaderOptions::NONE) {
@@ -44,7 +46,10 @@ RecordReader::RecordReader(RandomAccessFile* file,
   }
 }
 
-RecordReader::~RecordReader() {}
+RecordReader::~RecordReader() {
+  zlib_input_stream_.reset(nullptr);
+  random_input_stream_.reset(nullptr);
+}
 
 // Read n+4 bytes from file, verify that checksum of first n bytes is
 // stored in the last 4 bytes and store the first n bytes in *result.
@@ -59,7 +64,7 @@ Status RecordReader::ReadChecksummed(uint64 offset, size_t n,
   storage->resize(expected);
 
 #if !defined(IS_SLIM_BUILD)
-  if (zlib_input_buffer_) {
+  if (zlib_input_stream_) {
     // If we have a zlib compressed buffer, we assume that the
     // file is being read sequentially, and we use the underlying
     // implementation to read the data.
@@ -67,7 +72,7 @@ Status RecordReader::ReadChecksummed(uint64 offset, size_t n,
     // No checks are done to validate that the file is being read
     // sequentially.  At some point the zlib input buffer may support
     // seeking, possibly inefficiently.
-    TF_RETURN_IF_ERROR(zlib_input_buffer_->ReadNBytes(expected, storage));
+    TF_RETURN_IF_ERROR(zlib_input_stream_->ReadNBytes(expected, storage));
 
     if (storage->size() != expected) {
       if (storage->size() == 0) {
diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h
index e6e2a8c8abb..fb675ac98f5 100644
--- a/tensorflow/core/lib/io/record_reader.h
+++ b/tensorflow/core/lib/io/record_reader.h
@@ -18,10 +18,10 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
-#include "tensorflow/core/lib/io/inputbuffer.h"
 #if !defined(IS_SLIM_BUILD)
+#include "tensorflow/core/lib/io/random_inputstream.h"
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
-#include "tensorflow/core/lib/io/zlib_inputbuffer.h"
+#include "tensorflow/core/lib/io/zlib_inputstream.h"
 #endif  // IS_SLIM_BUILD
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@@ -64,7 +64,8 @@ class RecordReader {
   RandomAccessFile* src_;
   RecordReaderOptions options_;
 #if !defined(IS_SLIM_BUILD)
-  std::unique_ptr<ZlibInputBuffer> zlib_input_buffer_;
+  std::unique_ptr<RandomAccessInputStream> random_input_stream_;
+  std::unique_ptr<ZlibInputStream> zlib_input_stream_;
 #endif  // IS_SLIM_BUILD
 
   TF_DISALLOW_COPY_AND_ASSIGN(RecordReader);
diff --git a/tensorflow/core/lib/io/zlib_buffers_test.cc b/tensorflow/core/lib/io/zlib_buffers_test.cc
index 3118ab74b95..eaaf1497594 100644
--- a/tensorflow/core/lib/io/zlib_buffers_test.cc
+++ b/tensorflow/core/lib/io/zlib_buffers_test.cc
@@ -14,13 +14,14 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/io/inputbuffer.h"
+#include "tensorflow/core/lib/io/random_inputstream.h"
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
-#include "tensorflow/core/lib/io/zlib_inputbuffer.h"
+#include "tensorflow/core/lib/io/zlib_inputstream.h"
 #include "tensorflow/core/lib/io/zlib_outputbuffer.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 
 namespace tensorflow {
+namespace io {
 
 static std::vector<int> InputBufferSizes() {
   return {10, 100, 200, 500, 1000, 10000};
@@ -70,8 +71,8 @@ void TestAllCombinations(CompressionOptions input_options,
         TF_CHECK_OK(env->NewWritableFile(fname, &file_writer));
         string result;
 
-        io::ZlibOutputBuffer out(file_writer.get(), input_buf_size,
-                                 output_buf_size, output_options);
+        ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
+                             output_options);
 
         TF_CHECK_OK(out.Write(StringPiece(data)));
         TF_CHECK_OK(out.Close());
@@ -80,9 +81,11 @@ void TestAllCombinations(CompressionOptions input_options,
 
         std::unique_ptr<RandomAccessFile> file_reader;
         TF_CHECK_OK(env->NewRandomAccessFile(fname, &file_reader));
-        io::ZlibInputBuffer in(file_reader.get(), input_buf_size,
-                               output_buf_size, input_options);
-        TF_CHECK_OK(in.ReadNBytes(data.size(), &result));
+        std::unique_ptr<RandomAccessInputStream> input_stream(
+            new RandomAccessInputStream(file_reader.get()));
+        ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size,
+                           input_options);
+        TF_EXPECT_OK(in.ReadNBytes(data.size(), &result));
         EXPECT_EQ(result, data);
       }
     }
@@ -115,8 +118,8 @@ void TestMultipleWrites(uint8 input_buf_size, uint8 output_buf_size,
   string expected_result;
 
   TF_CHECK_OK(env->NewWritableFile(fname, &file_writer));
-  io::ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
-                           output_options);
+  ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
+                       output_options);
 
   for (int i = 0; i < num_writes; i++) {
     TF_CHECK_OK(out.Write(StringPiece(data)));
@@ -131,12 +134,14 @@ void TestMultipleWrites(uint8 input_buf_size, uint8 output_buf_size,
 
   std::unique_ptr<RandomAccessFile> file_reader;
   TF_CHECK_OK(env->NewRandomAccessFile(fname, &file_reader));
-  io::ZlibInputBuffer in(file_reader.get(), input_buf_size, output_buf_size,
-                         input_options);
+  std::unique_ptr<RandomAccessInputStream> input_stream(
+      new RandomAccessInputStream(file_reader.get()));
+  ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size,
+                     input_options);
 
   for (int i = 0; i < num_writes; i++) {
     string decompressed_output;
-    TF_CHECK_OK(in.ReadNBytes(data.size(), &decompressed_output));
+    TF_EXPECT_OK(in.ReadNBytes(data.size(), &decompressed_output));
     strings::StrAppend(&actual_result, decompressed_output);
   }
 
@@ -151,7 +156,7 @@ TEST(ZlibBuffers, MultipleWriteCallsWithFlush) {
   TestMultipleWrites(200, 200, 10, true);
 }
 
-TEST(ZlibInputBuffer, FailsToReadIfWindowBitsAreIncompatible) {
+TEST(ZlibInputStream, FailsToReadIfWindowBitsAreIncompatible) {
   Env* env = Env::Default();
   string fname = testing::TmpDir() + "/zlib_buffers_test";
   CompressionOptions output_options = CompressionOptions::DEFAULT();
@@ -165,8 +170,8 @@ TEST(ZlibInputBuffer, FailsToReadIfWindowBitsAreIncompatible) {
   std::unique_ptr<WritableFile> file_writer;
   TF_CHECK_OK(env->NewWritableFile(fname, &file_writer));
   string result;
-  io::ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
-                           output_options);
+  ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
+                       output_options);
 
   TF_CHECK_OK(out.Write(StringPiece(data)));
   TF_CHECK_OK(out.Close());
@@ -175,11 +180,14 @@ TEST(ZlibInputBuffer, FailsToReadIfWindowBitsAreIncompatible) {
 
   std::unique_ptr<RandomAccessFile> file_reader;
   TF_CHECK_OK(env->NewRandomAccessFile(fname, &file_reader));
-  io::ZlibInputBuffer in(file_reader.get(), input_buf_size, output_buf_size,
-                         input_options);
+  std::unique_ptr<RandomAccessInputStream> input_stream(
+      new RandomAccessInputStream(file_reader.get()));
+  ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size,
+                     input_options);
   Status read_status = in.ReadNBytes(data.size(), &result);
   CHECK_EQ(read_status.code(), error::DATA_LOSS);
   CHECK(read_status.error_message().find("inflate() failed") != string::npos);
 }
 
+}  // namespace io
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/io/zlib_inputbuffer.cc b/tensorflow/core/lib/io/zlib_inputstream.cc
similarity index 76%
rename from tensorflow/core/lib/io/zlib_inputbuffer.cc
rename to tensorflow/core/lib/io/zlib_inputstream.cc
index 8c0d9c800b5..b13931c4402 100644
--- a/tensorflow/core/lib/io/zlib_inputbuffer.cc
+++ b/tensorflow/core/lib/io/zlib_inputstream.cc
@@ -13,20 +13,20 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/lib/io/zlib_inputbuffer.h"
+#include "tensorflow/core/lib/io/zlib_inputstream.h"
+
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
 namespace io {
 
-ZlibInputBuffer::ZlibInputBuffer(
-    RandomAccessFile* file,
+ZlibInputStream::ZlibInputStream(
+    InputStreamInterface* input_stream,
     size_t input_buffer_bytes,   // size of z_stream.next_in buffer
     size_t output_buffer_bytes,  // size of z_stream.next_out buffer
     const ZlibCompressionOptions& zlib_options)
-    : file_(file),
-      file_pos_(0),
+    : input_stream_(input_stream),
       input_buffer_capacity_(input_buffer_bytes),
       output_buffer_capacity_(output_buffer_bytes),
       z_stream_input_(new Bytef[input_buffer_capacity_]),
@@ -48,19 +48,19 @@ ZlibInputBuffer::ZlibInputBuffer(
   } else {
     z_stream_->next_in = z_stream_input_.get();
     z_stream_->next_out = z_stream_output_.get();
-    next_unread_byte_ = (char*)z_stream_output_.get();
+    next_unread_byte_ = reinterpret_cast<char*>(z_stream_output_.get());
     z_stream_->avail_in = 0;
     z_stream_->avail_out = output_buffer_capacity_;
   }
 }
 
-ZlibInputBuffer::~ZlibInputBuffer() {
+ZlibInputStream::~ZlibInputStream() {
   if (z_stream_.get()) {
     inflateEnd(z_stream_.get());
   }
 }
 
-Status ZlibInputBuffer::ReadFromFile() {
+Status ZlibInputStream::ReadFromStream() {
   int bytes_to_read = input_buffer_capacity_;
   char* read_location = reinterpret_cast<char*>(z_stream_input_.get());
 
@@ -77,12 +77,12 @@ Status ZlibInputBuffer::ReadFromFile() {
     bytes_to_read -= z_stream_->avail_in;
     read_location += z_stream_->avail_in;
   }
-  StringPiece data;
+  string data;
   // Try to read enough data to fill up z_stream_input_.
-  Status s = file_->Read(file_pos_, bytes_to_read, &data, read_location);
-  if (data.data() != read_location) {
-    memmove(read_location, data.data(), data.size());
-  }
+  // TODO(rohanj): Add a char* version of ReadNBytes to InputStreamInterface
+  // and use that instead to make this more efficient.
+  Status s = input_stream_->ReadNBytes(bytes_to_read, &data);
+  memcpy(read_location, data.data(), data.size());
 
   // Since we moved unread data to the head of the input stream we can point
   // next_in to the head of the input stream.
@@ -90,16 +90,15 @@ Status ZlibInputBuffer::ReadFromFile() {
 
   // Note: data.size() could be different from bytes_to_read.
   z_stream_->avail_in += data.size();
-  file_pos_ += data.size();
 
   if (!s.ok() && !errors::IsOutOfRange(s)) {
     return s;
   }
 
-  // We throw OutOfRange error iff no new data has been read from file.
-  // Since we never check how much data is remaining in the file, it is
-  // possible that on the last read there isn't enough data in the file to
-  // fill up the buffer in which case file_->ReadNBytes would return an
+  // We throw OutOfRange error iff no new data has been read from stream.
+  // Since we never check how much data is remaining in the stream, it is
+  // possible that on the last read there isn't enough data in the stream to
+  // fill up the buffer in which case input_stream_->ReadNBytes would return an
   // OutOfRange error.
   if (data.size() == 0) {
     return errors::OutOfRange("EOF reached");
@@ -111,9 +110,10 @@ Status ZlibInputBuffer::ReadFromFile() {
   return s;
 }
 
-size_t ZlibInputBuffer::ReadBytesFromCache(size_t bytes_to_read,
+size_t ZlibInputStream::ReadBytesFromCache(size_t bytes_to_read,
                                            string* result) {
-  size_t unread_bytes = (char*)z_stream_->next_out - next_unread_byte_;
+  size_t unread_bytes =
+      reinterpret_cast<char*>(z_stream_->next_out) - next_unread_byte_;
   size_t can_read_bytes = std::min(bytes_to_read, unread_bytes);
   if (can_read_bytes > 0) {
     result->append(next_unread_byte_, can_read_bytes);
@@ -122,34 +122,35 @@ size_t ZlibInputBuffer::ReadBytesFromCache(size_t bytes_to_read,
   return can_read_bytes;
 }
 
-size_t ZlibInputBuffer::NumUnreadBytes() const {
-  size_t read_bytes = next_unread_byte_ - (char*)z_stream_output_.get();
+size_t ZlibInputStream::NumUnreadBytes() const {
+  size_t read_bytes =
+      next_unread_byte_ - reinterpret_cast<char*>(z_stream_output_.get());
   return output_buffer_capacity_ - z_stream_->avail_out - read_bytes;
 }
 
-Status ZlibInputBuffer::ReadNBytes(int64 bytes_to_read, string* result) {
+Status ZlibInputStream::ReadNBytes(int64 bytes_to_read, string* result) {
   result->clear();
   // Read as many bytes as possible from cache.
   bytes_to_read -= ReadBytesFromCache(bytes_to_read, result);
 
   while (bytes_to_read > 0) {
     // At this point we can be sure that cache has been emptied.
-    DCHECK(NumUnreadBytes() == 0);
+    DCHECK_EQ(NumUnreadBytes(), 0);
 
     // Now that the cache is empty we need to inflate more data.
 
     // Step 1. Fill up input buffer.
-    // We read from file only after the previously read contents have been
+    // We read from stream only after the previously read contents have been
     // completely consumed. This is an optimization and can be removed if
-    // it causes problems. `ReadFromFile` is capable of handling partially
+    // it causes problems. `ReadFromStream` is capable of handling partially
     // filled up buffers.
     if (z_stream_->avail_in == 0) {
-      TF_RETURN_IF_ERROR(ReadFromFile());
+      TF_RETURN_IF_ERROR(ReadFromStream());
     }
 
     // Step 2. Setup output stream.
     z_stream_->next_out = z_stream_output_.get();
-    next_unread_byte_ = (char*)z_stream_output_.get();
+    next_unread_byte_ = reinterpret_cast<char*>(z_stream_output_.get());
     z_stream_->avail_out = output_buffer_capacity_;
 
     // Step 3. Inflate Inflate Inflate!
@@ -161,7 +162,10 @@ Status ZlibInputBuffer::ReadNBytes(int64 bytes_to_read, string* result) {
   return Status::OK();
 }
 
-Status ZlibInputBuffer::Inflate() {
+// TODO(srbs): Implement this.
+int64 ZlibInputStream::Tell() const { return -1; }
+
+Status ZlibInputStream::Inflate() {
   int error = inflate(z_stream_.get(), zlib_options_.flush_mode);
   if (error != Z_OK && error != Z_STREAM_END) {
     string error_string =
diff --git a/tensorflow/core/lib/io/zlib_inputbuffer.h b/tensorflow/core/lib/io/zlib_inputstream.h
similarity index 66%
rename from tensorflow/core/lib/io/zlib_inputbuffer.h
rename to tensorflow/core/lib/io/zlib_inputstream.h
index 008d51876aa..c79c04d9535 100644
--- a/tensorflow/core/lib/io/zlib_inputbuffer.h
+++ b/tensorflow/core/lib/io/zlib_inputstream.h
@@ -13,11 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_LIB_IO_COMPRESSED_INPUTBUFFER_H_
-#define TENSORFLOW_LIB_IO_COMPRESSED_INPUTBUFFER_H_
+#ifndef TENSORFLOW_LIB_IO_ZLIB_INPUTSTREAM_H_
+#define TENSORFLOW_LIB_IO_ZLIB_INPUTSTREAM_H_
 
 #include <string>
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/io/inputstream_interface.h"
 #include "tensorflow/core/lib/io/zlib_compression_options.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/macros.h"
@@ -34,47 +35,48 @@ limitations under the License.
 namespace tensorflow {
 namespace io {
 
-// An ZlibInputBuffer provides support for reading from a file compressed using
-// zlib (http://www.zlib.net/).
+// An ZlibInputStream provides support for reading from a stream compressed
+// using zlib (http://www.zlib.net/). Buffers the contents of the file.
 //
-// A given instance of an ZlibInputBuffer is NOT safe for concurrent use
+// A given instance of an ZlibInputStream is NOT safe for concurrent use
 // by multiple threads
-class ZlibInputBuffer {
+class ZlibInputStream : public InputStreamInterface {
  public:
-  // Create a ZlibInputBuffer for `file` with a buffer of size
-  // `input_buffer_bytes` bytes for reading contents from `file` and another
-  // buffer with size `output_buffer_bytes` for caching decompressed contents.
-  // Does *not* take ownership of "file".
-  ZlibInputBuffer(RandomAccessFile* file, size_t input_buffer_bytes,
+  // Create a ZlibInputBuffer for `input_stream` with a buffer of size
+  // `input_buffer_bytes` bytes for reading contents from `input_stream` and
+  // another buffer with size `output_buffer_bytes` for caching decompressed
+  // contents. Does *not* take ownership of "input_stream".
+  ZlibInputStream(InputStreamInterface* input_stream, size_t input_buffer_bytes,
                   size_t output_buffer_bytes,
                   const ZlibCompressionOptions& zlib_options);
 
-  ~ZlibInputBuffer();
+  ~ZlibInputStream();
 
   // Reads bytes_to_read bytes into *result, overwriting *result.
   //
   // Return Status codes:
   // OK:           If successful.
   // OUT_OF_RANGE: If there are not enough bytes to read before
-  //               the end of the file.
+  //               the end of the stream.
   // ABORTED:      If inflate() fails, we return the error code with the
   //               error message in `z_stream_->msg`.
-  // others:       If reading from file failed.
-  Status ReadNBytes(int64 bytes_to_read, string* result);
+  // others:       If reading from stream failed.
+  Status ReadNBytes(int64 bytes_to_read, string* result) override;
+
+  int64 Tell() const override;
 
  private:
-  RandomAccessFile* file_;         // Not owned
-  int64 file_pos_;                 // Next position to read from in `file_`
-  size_t input_buffer_capacity_;   // Size of `z_stream_input_`
-  size_t output_buffer_capacity_;  // Size of `z_stream_output_`
-  char* next_unread_byte_;         // Next unread byte in `z_stream_output_`
+  InputStreamInterface* input_stream_;  // Not owned
+  size_t input_buffer_capacity_;        // Size of z_stream_input_
+  size_t output_buffer_capacity_;       // Size of z_stream_output_
+  char* next_unread_byte_;              // Next unread byte in z_stream_output_
 
-  // Buffer for storing contents read from compressed file.
+  // Buffer for storing contents read from compressed stream.
   // TODO(srbs): Consider using circular buffers. That would greatly simplify
   // the implementation.
   std::unique_ptr<Bytef[]> z_stream_input_;
 
-  // Buffer for storing inflated contents of `file_`.
+  // Buffer for storing inflated contents of `input_stream_`.
   std::unique_ptr<Bytef[]> z_stream_output_;
 
   ZlibCompressionOptions const zlib_options_;
@@ -92,8 +94,8 @@ class ZlibInputBuffer {
   //   Number of free bytes available at write location.
   std::unique_ptr<z_stream> z_stream_;
 
-  // Reads data from `file_` and tries to fill up `z_stream_input_` if enough
-  // unread data is left in `file_`.
+  // Reads data from `input_stream_` and tries to fill up `z_stream_input_` if
+  // enough unread data is left in `input_stream_`.
   //
   // Looks up z_stream_->next_in to check how much data in z_stream_input_
   // has already been read. The used data is removed and new data is added to
@@ -102,10 +104,10 @@ class ZlibInputBuffer {
   // and z_stream_->avail_in stores the number of readable bytes in
   // z_stream_input_.
   //
-  // Returns OutOfRange error if NO data could be read from file. Note that this
-  // won't return an OutOfRange if there wasn't sufficient data in file to
-  // completely fill up z_stream_input_.
-  Status ReadFromFile();
+  // Returns OutOfRange error if NO data could be read from stream. Note that
+  // this won't return an OutOfRange if there wasn't sufficient data in stream
+  // to completely fill up z_stream_input_.
+  Status ReadFromStream();
 
   // Calls `inflate()` and returns DataLoss Status if it failed.
   Status Inflate();
@@ -131,10 +133,10 @@ class ZlibInputBuffer {
   // Returns the size of [next_unread_byte_, z_stream_->next_out)
   size_t NumUnreadBytes() const;
 
-  TF_DISALLOW_COPY_AND_ASSIGN(ZlibInputBuffer);
+  TF_DISALLOW_COPY_AND_ASSIGN(ZlibInputStream);
 };
 
 }  // namespace io
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_LIB_IO_CompressedInputBuffer_H_
+#endif  // TENSORFLOW_LIB_IO_ZLIB_INPUTSTREAM_H_
diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.cc b/tensorflow/core/lib/jpeg/jpeg_mem.cc
index 9a317f1fd2c..ac12798322b 100644
--- a/tensorflow/core/lib/jpeg/jpeg_mem.cc
+++ b/tensorflow/core/lib/jpeg/jpeg_mem.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <algorithm>
 #include <memory>
 #include <string>
+#include <utility>
 
 #include "tensorflow/core/lib/jpeg/jpeg_handle.h"
 #include "tensorflow/core/platform/logging.h"
@@ -52,7 +53,7 @@ class FewerArgsForCompiler {
       : datasize_(datasize),
         flags_(flags),
         pnwarn_(nwarn),
-        allocate_output_(allocate_output),
+        allocate_output_(std::move(allocate_output)),
         height_read_(0),
         height_(0),
         stride_(0) {
diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
index 70c1b315c89..8543aa0cb0d 100644
--- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
@@ -5460,6 +5460,51 @@ op {
     }
   }
 }
+op {
+  name: "BatchSvd"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "s"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "u"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_uv"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "full_matrices"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "BatchToSpace"
   input_arg {
@@ -10578,6 +10623,71 @@ op {
     }
   }
 }
+op {
+  name: "FusedResizeAndPadConv2D"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "paddings"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "resize_align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    allowed_values {
+      list {
+        s: "REFLECT"
+        s: "SYMMETRIC"
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+}
 op {
   name: "Gather"
   input_arg {
@@ -26364,6 +26474,51 @@ op {
     }
   }
 }
+op {
+  name: "Svd"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "s"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "u"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "v"
+    type_attr: "T"
+  }
+  attr {
+    name: "compute_uv"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  attr {
+    name: "full_matrices"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_DOUBLE
+        type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
+      }
+    }
+  }
+}
 op {
   name: "Switch"
   input_arg {
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index cc374278e7f..5daaf83133a 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/util/mirror_pad_mode.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 
@@ -425,6 +426,46 @@ data_format: Specify the data format of the input and output data. With the
         [batch, in_channels, in_height, in_width].
 )doc");
 
+REGISTER_OP("FusedResizeAndPadConv2D")
+    .Input("input: T")
+    .Input("size: int32")
+    .Input("paddings: int32")
+    .Input("filter: T")
+    .Output("output: T")
+    .Attr("T: {half, float, double}")
+    .Attr("resize_align_corners: bool = false")
+    .Attr(GetMirrorPadModeAttrString())
+    .Attr("strides: list(int)")
+    .Attr(GetPaddingAttrString())
+    .Doc(R"doc(
+Performs a resize and padding as a preprocess during a convolution.
+
+It's often possible to do spatial transformations more efficiently as part of
+the packing stage of a convolution, so this op allows for an optimized
+implementation where these stages are fused together. This prevents the need to
+write out the intermediate results as whole tensors, reducing memory pressure,
+and we can get some latency gains by merging the transformation calculations.
+The data_format attribute for Conv2D isn't supported by this op, and defaults to
+'NHWC' order.
+Internally this op uses a single per-graph scratch buffer, which means that it
+will block if multiple versions are being run in parallel. This is because this
+operator is primarily an optimization to minimize memory usage.
+
+input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+  new size for the images.
+paddings: A two-column matrix specifying the padding sizes. The number of
+  rows must be the same as the rank of `input`.
+filter: 4-D with shape
+  `[filter_height, filter_width, in_channels, out_channels]`.
+resize_align_corners: If true, rescale input by (new_height - 1) / (height - 1),
+  which exactly aligns the 4 corners of images and resized images. If false, rescale
+  by new_height / height. Treat similarly the width dimension.
+strides: 1-D of length 4.  The stride of the sliding window for each dimension
+   of `input`. Must be in the same order as the dimension specified with format.
+padding: The type of padding algorithm to use.
+ )doc");
+
 // --------------------------------------------------------------------------
 
 REGISTER_OP("DepthwiseConv2dNative")
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 3f55b395cde..946e098eba2 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -2688,6 +2688,8 @@ op {
       list {
         type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
@@ -6528,6 +6530,80 @@ op {
   }
   summary: "Computes gradient of the FractionalMaxPool function."
 }
+op {
+  name: "FusedResizeAndPadConv2D"
+  input_arg {
+    name: "input"
+    description: "4-D with shape `[batch, in_height, in_width, in_channels]`."
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    description: "A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The\nnew size for the images."
+    type: DT_INT32
+  }
+  input_arg {
+    name: "paddings"
+    description: "A two-column matrix specifying the padding sizes. The number of\nrows must be the same as the rank of `input`."
+    type: DT_INT32
+  }
+  input_arg {
+    name: "filter"
+    description: "4-D with shape\n`[filter_height, filter_width, in_channels, out_channels]`."
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "resize_align_corners"
+    type: "bool"
+    default_value {
+      b: false
+    }
+    description: "If true, rescale input by (new_height - 1) / (height - 1),\nwhich exactly aligns the 4 corners of images and resized images. If false, rescale\nby new_height / height. Treat similarly the width dimension."
+  }
+  attr {
+    name: "mode"
+    type: "string"
+    allowed_values {
+      list {
+        s: "REFLECT"
+        s: "SYMMETRIC"
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+    description: "1-D of length 4.  The stride of the sliding window for each dimension\nof `input`. Must be in the same order as the dimension specified with format."
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    description: "The type of padding algorithm to use."
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  summary: "Performs a resize and padding as a preprocess during a convolution."
+  description: "It\'s often possible to do spatial transformations more efficiently as part of\nthe packing stage of a convolution, so this op allows for an optimized\nimplementation where these stages are fused together. This prevents the need to\nwrite out the intermediate results as whole tensors, reducing memory pressure,\nand we can get some latency gains by merging the transformation calculations.\nThe data_format attribute for Conv2D isn\'t supported by this op, and defaults to\n\'NHWC\' order.\nInternally this op uses a single per-graph scratch buffer, which means that it\nwill block if multiple versions are being run in parallel. This is because this\noperator is primarily an optimization to minimize memory usage."
+}
 op {
   name: "Gather"
   input_arg {
@@ -16111,6 +16187,8 @@ op {
       list {
         type: DT_DOUBLE
         type: DT_FLOAT
+        type: DT_COMPLEX64
+        type: DT_COMPLEX128
       }
     }
   }
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 7426006cec6..fc35c293d27 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -92,7 +92,7 @@ class GcsRandomAccessFile : public RandomAccessFile {
       : bucket_(bucket),
         object_(object),
         auth_provider_(auth_provider),
-        http_request_factory_(std::move(http_request_factory)),
+        http_request_factory_(http_request_factory),
         read_ahead_bytes_(read_ahead_bytes) {}
 
   /// The implementation of reads with a read-ahead buffer.
@@ -189,7 +189,7 @@ class GcsWritableFile : public WritableFile {
       : bucket_(bucket),
         object_(object),
         auth_provider_(auth_provider),
-        http_request_factory_(std::move(http_request_factory)) {
+        http_request_factory_(http_request_factory) {
     if (GetTmpFilename(&tmp_content_filename_).ok()) {
       outfile_.open(tmp_content_filename_,
                     std::ofstream::binary | std::ofstream::app);
@@ -208,7 +208,7 @@ class GcsWritableFile : public WritableFile {
       : bucket_(bucket),
         object_(object),
         auth_provider_(auth_provider),
-        http_request_factory_(std::move(http_request_factory)) {
+        http_request_factory_(http_request_factory) {
     tmp_content_filename_ = tmp_content_filename;
     outfile_.open(tmp_content_filename_,
                   std::ofstream::binary | std::ofstream::app);
diff --git a/tensorflow/core/platform/hexagon/profile_utils/cpu_utils.cc b/tensorflow/core/platform/profile_utils/cpu_utils.cc
similarity index 77%
rename from tensorflow/core/platform/hexagon/profile_utils/cpu_utils.cc
rename to tensorflow/core/platform/profile_utils/cpu_utils.cc
index d94d9411f18..7e13c39b624 100644
--- a/tensorflow/core/platform/hexagon/profile_utils/cpu_utils.cc
+++ b/tensorflow/core/platform/profile_utils/cpu_utils.cc
@@ -13,8 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/core/platform/hexagon/profile_utils/cpu_utils.h"
-
+#include "tensorflow/core/platform/profile_utils/cpu_utils.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
@@ -24,11 +23,7 @@ namespace {
 
 const class StaticVariableInitializer {
  public:
-  StaticVariableInitializer() {
-    CpuUtils::GetCpuFrequency();
-    CpuUtils::GetClockPerMicroSec();
-    CpuUtils::GetMicroSecPerClock();
-  }
+  StaticVariableInitializer() { CpuUtils::Initialize(); }
 } STATIC_VARIABLE_INITIALIZER;
 
 }  // anonymous namespace for initializer
@@ -52,6 +47,21 @@ const class StaticVariableInitializer {
   return micro_sec_per_clock;
 }
 
+/* static */ void CpuUtils::Initialize() {
+  CpuUtils::GetCpuFrequency();
+  CpuUtils::GetClockPerMicroSec();
+  CpuUtils::GetMicroSecPerClock();
+  GetCpuUtilsHelper().Initialize();
+}
+
+/* static */ void CpuUtils::ResetClockCycle() {
+  GetCpuUtilsHelper().ResetClockCycle();
+}
+
+/* static */ void CpuUtils::EnableClockCycleProfile(const bool enable) {
+  GetCpuUtilsHelper().EnableClockCycleProfile(enable);
+}
+
 /* static */ int64 CpuUtils::GetCpuFrequencyImpl() {
 #if defined(__linux__)
   double bogomips;
@@ -82,5 +92,11 @@ const class StaticVariableInitializer {
 #endif
 }
 
+/* static */ ICpuUtilsHelper& CpuUtils::GetCpuUtilsHelper() {
+  // TODO(satok): Change CpuUtilsHelper by cpu architecture
+  static DefaultCpuUtilsHelper cpu_utils_helper;
+  return cpu_utils_helper;
+}
+
 }  // namespace profile_utils
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/hexagon/profile_utils/cpu_utils.h b/tensorflow/core/platform/profile_utils/cpu_utils.h
similarity index 75%
rename from tensorflow/core/platform/hexagon/profile_utils/cpu_utils.h
rename to tensorflow/core/platform/profile_utils/cpu_utils.h
index 8e08f17fc63..3155c88a113 100644
--- a/tensorflow/core/platform/hexagon/profile_utils/cpu_utils.h
+++ b/tensorflow/core/platform/profile_utils/cpu_utils.h
@@ -14,10 +14,11 @@ limitations under the License.
 ==============================================================================*/
 // This class is designed to get accurate profile for programs.
 
-#ifndef TENSORFLOW_PLATFORM_HEXAGON_PROFILEUTILS_CPU_UTILS_H__
-#define TENSORFLOW_PLATFORM_HEXAGON_PROFILEUTILS_CPU_UTILS_H__
+#ifndef TENSORFLOW_PLATFORM_PROFILEUTILS_CPU_UTILS_H__
+#define TENSORFLOW_PLATFORM_PROFILEUTILS_CPU_UTILS_H__
 
 #include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/profile_utils/i_cpu_utils_helper.h"
 #include "tensorflow/core/platform/types.h"
 
 #if defined(ARMV6) || defined(__ARM_ARCH_7A__)
@@ -35,12 +36,12 @@ class CpuUtils {
   static constexpr int64 INVALID_FREQUENCY = -1;
   static constexpr uint64 DUMMY_CYCLE_CLOCK = 1;
 
-  // Return cpu count obtained by rdtsc. This function is designed to
+  // Return current clock cycle. This function is designed to
   // minimize the overhead to get clock and maximize the accuracy of
   // time for profile.
   // This returns unsigned int because there is no guarantee that rdtsc
   // is less than 2 ^ 61.
-  static inline uint64 GetCurrentCycleClock() {
+  static inline uint64 GetCurrentClockCycle() {
 // ----------------------------------------------------------------
 #if defined(__x86_64__) || defined(__amd64__)
     uint64_t high, low;
@@ -95,13 +96,41 @@ class CpuUtils {
   // there is no overhead except function call to call this method.
   static double GetMicroSecPerClock();
 
+  // Initialize CpuUtils
+  // This method is called from the static initializer declared in cpu_utils.cc
+  // This initializes state and cached static variables declared in functions.
+  static void Initialize();
+
+  // Reset clock cycle
+  // Resetting clock cycle is recommended to prevent
+  // clock cycle counters from overflowing on some platforms.
+  static void ResetClockCycle();
+
+  // Enable clock cycle profile
+  // You can enable / disable profile if it's supported by the platform
+  static void EnableClockCycleProfile(bool enable);
+
  private:
+  class DefaultCpuUtilsHelper : public ICpuUtilsHelper {
+   public:
+    DefaultCpuUtilsHelper() = default;
+    void Initialize() final {}
+    void ResetClockCycle() final {}
+    uint64 GetCurrentClockCycle() final { return DUMMY_CYCLE_CLOCK; }
+    void EnableClockCycleProfile(bool /* enable */) final {}
+
+   private:
+    TF_DISALLOW_COPY_AND_ASSIGN(DefaultCpuUtilsHelper);
+  };
+
   // Return cpu frequency.
   // CAVEAT: as this method calls system call and parse the mssage,
   // this call may be slow. This is why this class caches the value by
   // StaticVariableInitializer.
   static int64 GetCpuFrequencyImpl();
 
+  static ICpuUtilsHelper& GetCpuUtilsHelper();
+
   TF_DISALLOW_COPY_AND_ASSIGN(CpuUtils);
 };
 
@@ -109,4 +138,4 @@ class CpuUtils {
 
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_PLATFORM_HEXAGON_PROFILEUTILS_CPU_UTILS_H__
+#endif  // TENSORFLOW_PLATFORM_PROFILEUTILS_CPU_UTILS_H__
diff --git a/tensorflow/core/platform/hexagon/profile_utils/cpu_utils_test.cc b/tensorflow/core/platform/profile_utils/cpu_utils_test.cc
similarity index 87%
rename from tensorflow/core/platform/hexagon/profile_utils/cpu_utils_test.cc
rename to tensorflow/core/platform/profile_utils/cpu_utils_test.cc
index 0db1903695b..99b3cb234f2 100644
--- a/tensorflow/core/platform/hexagon/profile_utils/cpu_utils_test.cc
+++ b/tensorflow/core/platform/profile_utils/cpu_utils_test.cc
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 // This class is designed to get accurate profiles for programs
 
-#include "tensorflow/core/platform/hexagon/profile_utils/cpu_utils.h"
+#include "tensorflow/core/platform/profile_utils/cpu_utils.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/test.h"
 
@@ -23,17 +23,17 @@ namespace profile_utils {
 
 static constexpr bool DBG = false;
 
-TEST(CpuUtils, CheckGetCurrentCycleClock) {
+TEST(CpuUtils, CheckGetCurrentClockCycle) {
   static constexpr int LOOP_COUNT = 10;
-  const uint64 start_clock_count = CpuUtils::GetCurrentCycleClock();
+  const uint64 start_clock_count = CpuUtils::GetCurrentClockCycle();
   CHECK_GT(start_clock_count, 0);
   uint64 prev_clock_count = start_clock_count;
   for (int i = 0; i < LOOP_COUNT; ++i) {
-    const uint64 clock_count = CpuUtils::GetCurrentCycleClock();
+    const uint64 clock_count = CpuUtils::GetCurrentClockCycle();
     CHECK_GE(clock_count, prev_clock_count);
     prev_clock_count = clock_count;
   }
-  const uint64 end_clock_count = CpuUtils::GetCurrentCycleClock();
+  const uint64 end_clock_count = CpuUtils::GetCurrentClockCycle();
   if (DBG) {
     LOG(INFO) << "start clock = " << start_clock_count;
     LOG(INFO) << "end clock = " << end_clock_count;
diff --git a/tensorflow/core/platform/profile_utils/i_cpu_utils_helper.h b/tensorflow/core/platform/profile_utils/i_cpu_utils_helper.h
new file mode 100644
index 00000000000..5e02df2ce8f
--- /dev/null
+++ b/tensorflow/core/platform/profile_utils/i_cpu_utils_helper.h
@@ -0,0 +1,51 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_PLATFORM_PROFILEUTILS_I_CPU_UTILS_HELPER_H__
+#define TENSORFLOW_PLATFORM_PROFILEUTILS_I_CPU_UTILS_HELPER_H__
+
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace profile_utils {
+
+// ICpuUtilsHelper is an interface class for cpu_utils which proxies
+// the difference of profiling functions of different platforms.
+class ICpuUtilsHelper {
+ public:
+  ICpuUtilsHelper() = default;
+  virtual ~ICpuUtilsHelper() = default;
+  // Initialize CpuUtilsHelper.
+  // This method is called only once when CpuUtils is loaded.
+  virtual void Initialize() = 0;
+  // Reset clock cycle.
+  // Resetting clock cycle is recommended to prevent
+  // clock cycle counters from overflowing on some platforms.
+  virtual void ResetClockCycle() = 0;
+  // Return current clock cycle.
+  virtual uint64 GetCurrentClockCycle() = 0;
+  // Enable clock cycle profile
+  // You can enable / disable profile if it's supported by the platform
+  virtual void EnableClockCycleProfile(bool enable) = 0;
+
+ private:
+  TF_DISALLOW_COPY_AND_ASSIGN(ICpuUtilsHelper);
+};
+
+}  // profile_utils
+}  // tensorflow
+
+#endif  // TENSORFLOW_PLATFORM_PROFILEUTILS_I_CPU_UTILS_HELPER_H__
diff --git a/tensorflow/core/util/presized_cuckoo_map.h b/tensorflow/core/util/presized_cuckoo_map.h
index a1ae3ec2485..cf3b8cf5b31 100644
--- a/tensorflow/core/util/presized_cuckoo_map.h
+++ b/tensorflow/core/util/presized_cuckoo_map.h
@@ -18,7 +18,6 @@ limitations under the License.
 
 #include <algorithm>
 #include <vector>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/platform/macros.h"
 
@@ -44,6 +43,32 @@ namespace tensorflow {
 // a good cuckoo path with less data movement (see
 // http://www.cs.cmu.edu/~dga/papers/cuckoo-eurosys14.pdf )
 
+namespace presized_cuckoo_map {
+// Utility function to compute (x * y) >> 64, or "multiply high".
+// On x86-64, this is a single instruction, but not all platforms
+// support the __uint128_t type, so we provide a generic
+// implementation as well.
+inline uint64 multiply_high_u64(uint64 x, uint64 y) {
+#if defined(__SIZEOF_INT128__)
+  return (uint64)(((__uint128_t)x * (__uint128_t)y) >> 64);
+#else
+  // For platforms without int128 support, do it the long way.
+  uint64 x_lo = x & 0xffffffff;
+  uint64 x_hi = x >> 32;
+  uint64 buckets_lo = y & 0xffffffff;
+  uint64 buckets_hi = y >> 32;
+  uint64 prod_hi = x_hi * buckets_hi;
+  uint64 prod_lo = x_lo * buckets_lo;
+  uint64 prod_mid1 = x_hi * buckets_lo;
+  uint64 prod_mid2 = x_lo * buckets_hi;
+  uint64 carry =
+      ((prod_mid1 & 0xffffffff) + (prod_mid2 & 0xffffffff) + (prod_lo >> 32)) >>
+      32;
+  return prod_hi + (prod_mid1 >> 32) + (prod_mid2 >> 32) + carry;
+#endif
+}
+}
+
 template <class value>
 class PresizedCuckooMap {
  public:
@@ -67,18 +92,14 @@ class PresizedCuckooMap {
     }
     buckets_.clear();
     buckets_.resize(num_buckets_, empty_bucket);
-#if !defined(__GCUDACC__) && !defined(__GCUDACC_HOST__) && \
-    !defined(IS_MOBILE_PLATFORM)
-    buckets_divisor_ = Eigen::internal::TensorIntDivisor<uint64>(num_buckets_);
-#endif
   }
 
   // Returns false if k is already in table or if the table
   // is full; true otherwise.
   bool InsertUnique(const key_type k, const value& v) {
     uint64 tk = key_transform(k);
-    uint64 b1 = fast_mod_by_buckets(tk);
-    uint64 b2 = fast_mod_by_buckets(h2(tk));
+    uint64 b1 = fast_map_to_buckets(tk);
+    uint64 b2 = fast_map_to_buckets(h2(tk));
 
     // Merged find and duplicate checking.
     uint64 target_bucket = 0;
@@ -107,8 +128,8 @@ class PresizedCuckooMap {
   // Returns true if found.  Sets *out = value.
   bool Find(const key_type k, value* out) const {
     uint64 tk = key_transform(k);
-    return FindInBucket(k, fast_mod_by_buckets(tk), out) ||
-           FindInBucket(k, fast_mod_by_buckets(h2(tk)), out);
+    return FindInBucket(k, fast_map_to_buckets(tk), out) ||
+           FindInBucket(k, fast_map_to_buckets(h2(tk)), out);
   }
 
  private:
@@ -180,9 +201,9 @@ class PresizedCuckooMap {
       return e;
     }
 
-    bool empty() { return head_ == tail_; }
+    bool empty() const { return head_ == tail_; }
 
-    bool full() { return ((tail_ + 1) % kMaxQueueSize) == head_; }
+    bool full() const { return ((tail_ + 1) % kMaxQueueSize) == head_; }
 
     void reset() { head_ = tail_ = 0; }
 
@@ -210,13 +231,13 @@ class PresizedCuckooMap {
     return m * ((h >> 32) | (h << 32));
   }
 
-  // alt_bucket identifies the "other" bucket for key k, whether
+  // alt_bucket identifies the "other" bucket for key k, where
   // other is "the one that isn't bucket b"
   inline uint64 alt_bucket(key_type k, uint64 b) const {
-    if (fast_mod_by_buckets(k) != b) {
-      return fast_mod_by_buckets(k);
+    if (fast_map_to_buckets(k) != b) {
+      return fast_map_to_buckets(k);
     }
-    return fast_mod_by_buckets(h2(k));
+    return fast_map_to_buckets(h2(k));
   }
 
   inline void InsertInternal(key_type k, const value& v, uint64 b, int slot) {
@@ -306,22 +327,17 @@ class PresizedCuckooMap {
     return false;
   }
 
-  inline uint64 fast_mod_by_buckets(uint64 x) const {
-// Omitting the optimized bucket mod for CUDA platforms
-// until Eigen supports 2^63 divisors on GPU.
-#if !defined(__GCUDACC__) && !defined(__GCUDACC_HOST__) && \
-    !defined(IS_MOBILE_PLATFORM)
-    x &= ~(1ULL << 63);  // Fast div can only handle 2^63-1
-    return x - num_buckets_ * (x / buckets_divisor_);
-#else
-    return x % num_buckets_;
-#endif
+  inline uint64 fast_map_to_buckets(uint64 x) const {
+    // Map x (uniform in 2^64) to the range [0, num_buckets_ -1]
+    // using Lemire's alternative to modulo reduction:
+    // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
+    // Instead of x % N, use (x * N) >> 64.
+    return presized_cuckoo_map::multiply_high_u64(x, num_buckets_);
   }
 
   // Set upon initialization: num_entries / kLoadFactor / kSlotsPerBucket.
   uint64 num_buckets_;
   std::vector<Bucket> buckets_;
-  Eigen::internal::TensorIntDivisor<uint64> buckets_divisor_;  // for fast mod
 
   std::unique_ptr<CuckooPathQueue> cpq_;
   CuckooPathEntry visited_[kVisitedListSize];
diff --git a/tensorflow/core/util/presized_cuckoo_map_test.cc b/tensorflow/core/util/presized_cuckoo_map_test.cc
index fe8e5dcfbd6..f2be1e8a2ff 100644
--- a/tensorflow/core/util/presized_cuckoo_map_test.cc
+++ b/tensorflow/core/util/presized_cuckoo_map_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/util/presized_cuckoo_map.h"
+#include <array>
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/platform/test.h"
@@ -22,6 +23,25 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+TEST(PresizedCuckooMapTest, MultiplyHigh) {
+  struct Testcase {
+    uint64 x;
+    uint64 y;
+    uint64 result;
+  };
+  std::array<Testcase, 7> testcases{
+      {{0, 0, 0},
+       {0xffffffff, 0xffffffff, 0},
+       {0x2, 0xf000000000000000, 1},
+       {0x3, 0xf000000000000000, 2},
+       {0x3, 0xf000000000000001, 2},
+       {0x3, 0xffffffffffffffff, 2},
+       {0xffffffffffffffff, 0xffffffffffffffff, 0xfffffffffffffffe}}};
+  for (auto &tc : testcases) {
+    EXPECT_EQ(tc.result, presized_cuckoo_map::multiply_high_u64(tc.x, tc.y));
+  }
+}
+
 TEST(PresizedCuckooMapTest, Basic) {
   PresizedCuckooMap<int> pscm(1000);
   EXPECT_TRUE(pscm.InsertUnique(1, 2));
@@ -34,24 +54,31 @@ TEST(PresizedCuckooMapTest, TooManyItems) {
   static constexpr int kTableSize = 1000;
   PresizedCuckooMap<int> pscm(kTableSize);
   for (uint64 i = 0; i < kTableSize; i++) {
-    EXPECT_TRUE(pscm.InsertUnique(i, i));
+    uint64 key =
+        Fingerprint64(string(reinterpret_cast<char *>(&i), sizeof(int64)));
+    ASSERT_TRUE(pscm.InsertUnique(key, i));
   }
   // Try to over-fill the table.  A few of these
   // inserts will succeed, but should start failing.
   uint64 failed_at = 0;
   for (uint64 i = kTableSize; i < (2 * kTableSize); i++) {
-    if (!pscm.InsertUnique(i, i)) {
+    uint64 key =
+        Fingerprint64(string(reinterpret_cast<char *>(&i), sizeof(int64)));
+    if (!pscm.InsertUnique(key, i)) {
       failed_at = i;
       break;
     }
   }
   // Requirement 1:  Table must return failure when it's full.
   EXPECT_NE(failed_at, 0);
+
   // Requirement 2:  Table must preserve all items inserted prior
   // to the failure.
   for (uint64 i = 0; i < failed_at; i++) {
     int out;
-    EXPECT_TRUE(pscm.Find(i, &out));
+    uint64 key =
+        Fingerprint64(string(reinterpret_cast<char *>(&i), sizeof(int64)));
+    EXPECT_TRUE(pscm.Find(key, &out));
     EXPECT_EQ(out, i);
   }
 }
diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h
index 1665446aee4..b10f547a7f2 100644
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h
@@ -344,7 +344,9 @@ bool SparseTensor::ToDense(Tensor* out, bool initialize) {
 
   std::vector<int64> strides(dims_);
   const auto& out_shape = out->shape();
-  strides[dims_ - 1] = 1;
+  if (dims_ > 0) {
+    strides[dims_ - 1] = 1;
+  }
   for (int d = dims_ - 2; d >= 0; --d) {
     strides[d] = strides[d + 1] * out_shape.dim_size(d + 1);
   }
diff --git a/tensorflow/core/util/sparse/sparse_tensor_test.cc b/tensorflow/core/util/sparse/sparse_tensor_test.cc
index 0112fc37560..f586c731acf 100644
--- a/tensorflow/core/util/sparse/sparse_tensor_test.cc
+++ b/tensorflow/core/util/sparse/sparse_tensor_test.cc
@@ -602,6 +602,20 @@ TEST(SparseTensorTest, Split) {
   EXPECT_EQ(st_list[1].indices().matrix<int64>()(0, 1), 0);
 }
 
+TEST(SparseTensorTest, Dim0SparseTensorToDenseTensor) {
+  Tensor ix(DT_INT64, TensorShape({1, 0}));
+  Tensor vals(DT_INT32, TensorShape({1}));
+  vals.scalar<int32>()() = 5;
+
+  TensorShape shape({});
+  SparseTensor st(ix, vals, shape);
+
+  Tensor dense(DT_INT32, TensorShape({}));
+  st.ToDense<int32>(&dense);
+
+  EXPECT_EQ(dense.scalar<int32>()(), 5);
+}
+
 }  // namespace
 }  // namespace sparse
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/tensor_slice_reader.cc b/tensorflow/core/util/tensor_slice_reader.cc
index 9ab81af43b1..b40f5e77369 100644
--- a/tensorflow/core/util/tensor_slice_reader.cc
+++ b/tensorflow/core/util/tensor_slice_reader.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/util/tensor_slice_reader.h"
 
+#include <utility>
 #include <vector>
 #include "tensorflow/core/framework/types.pb_text.h"
 #include "tensorflow/core/framework/versions.h"
@@ -107,7 +108,7 @@ TensorSliceReader::TensorSliceReader(const string& filepattern,
 TensorSliceReader::TensorSliceReader(const string& filepattern,
                                      OpenTableFunction open_function,
                                      int preferred_shard)
-    : filepattern_(filepattern), open_function_(open_function) {
+    : filepattern_(filepattern), open_function_(std::move(open_function)) {
   VLOG(1) << "TensorSliceReader for " << filepattern;
   Status s = io::GetMatchingFiles(Env::Default(), filepattern, &fnames_);
   if (!s.ok()) {
diff --git a/tensorflow/core/util/tensor_slice_set.cc b/tensorflow/core/util/tensor_slice_set.cc
index d4b9a4087cc..4217df90ca1 100644
--- a/tensorflow/core/util/tensor_slice_set.cc
+++ b/tensorflow/core/util/tensor_slice_set.cc
@@ -42,7 +42,7 @@ Status TensorSliceSet::Register(const TensorSlice& slice, const string& tag,
     // We check if there is any intersection between this slice and any of the
     // registered slices.
     if (slices_hull_.Overlaps(slice)) {
-      for (const auto x : slices_) {
+      for (const auto& x : slices_) {
         if (slice.Overlaps(x.second.slice)) {
           return errors::Internal("Overlapping slices: existing slice = ",
                                   x.first, ", new slice = ", str);
@@ -89,7 +89,7 @@ bool TensorSliceSet::Query(const TensorSlice& slice, float* data) const {
     int64 overlap_size = 0;
     TensorSlice intersection;
     TensorShape inter_shape;
-    for (const auto x : slices_) {
+    for (const auto& x : slices_) {
       if (slice.Intersect(x.second.slice, &intersection)) {
         s = intersection.SliceTensorShape(shape_, &inter_shape);
         if (!s.ok()) {
@@ -103,7 +103,7 @@ bool TensorSliceSet::Query(const TensorSlice& slice, float* data) const {
       // We have it!
       // Now we need to copy the data to "data"
       if (data) {
-        for (const auto x : slices_) {
+        for (const auto& x : slices_) {
           CopyDataFromTensorSliceToTensorSlice(shape_, x.second.slice, slice,
                                                x.second.data, data);
         }
@@ -146,7 +146,7 @@ bool TensorSliceSet::QueryMeta(
     int64 overlap_size = 0;
     TensorSlice intersection;
     TensorShape inter_shape;
-    for (const auto x : slices_) {
+    for (const auto& x : slices_) {
       if (slice.Intersect(x.second.slice, &intersection)) {
         s = intersection.SliceTensorShape(shape_, &inter_shape);
         if (!s.ok()) {
@@ -180,7 +180,7 @@ Status RegisterTensorSlice(
     tensor_slices->insert(std::make_pair(name, tss));
   } else {
     // Check if the shapes match
-    TensorShape tss_shape(tss->shape());
+    const TensorShape& tss_shape(tss->shape());
     if (!shape.IsSameSize(tss_shape)) {
       return errors::Internal("Incompatible tensor shapes detected for tensor ",
                               name, ": existing = ", tss_shape.DebugString(),
diff --git a/tensorflow/core/util/tensor_slice_writer.cc b/tensorflow/core/util/tensor_slice_writer.cc
index 8907aa65227..928d6fe72c7 100644
--- a/tensorflow/core/util/tensor_slice_writer.cc
+++ b/tensorflow/core/util/tensor_slice_writer.cc
@@ -15,6 +15,8 @@ limitations under the License.
 
 #include "tensorflow/core/util/tensor_slice_writer.h"
 
+#include <utility>
+
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/table_builder.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -81,7 +83,7 @@ Status CreateTableTensorSliceBuilder(const string& name,
 TensorSliceWriter::TensorSliceWriter(const string& filename,
                                      CreateBuilderFunction create_builder)
     : filename_(filename),
-      create_builder_(create_builder),
+      create_builder_(std::move(create_builder)),
       tmpname_(strings::StrCat(filename, ".tempstate", random::New64())),
       slices_(0) {
   VersionDef* versions = sts_.mutable_meta()->mutable_versions();
diff --git a/tensorflow/examples/skflow/boston.py b/tensorflow/examples/skflow/boston.py
index 7aacb1b9ff2..10b6305443d 100644
--- a/tensorflow/examples/skflow/boston.py
+++ b/tensorflow/examples/skflow/boston.py
@@ -45,7 +45,8 @@ def main(unused_argv):
   regressor.fit(x_train, y_train, steps=5000, batch_size=1)
 
   # Predict and score
-  y_predicted = regressor.predict(scaler.transform(x_test))
+  y_predicted = list(
+      regressor.predict(scaler.transform(x_test), as_iterable=True))
   score = metrics.mean_squared_error(y_predicted, y_test)
 
   print('MSE: {0:f}'.format(score))
diff --git a/tensorflow/examples/skflow/iris.py b/tensorflow/examples/skflow/iris.py
index 9bd3faa942a..957c91c2b39 100644
--- a/tensorflow/examples/skflow/iris.py
+++ b/tensorflow/examples/skflow/iris.py
@@ -36,7 +36,8 @@ def main(unused_argv):
 
   # Fit and predict.
   classifier.fit(x_train, y_train, steps=200)
-  score = metrics.accuracy_score(y_test, classifier.predict(x_test))
+  predictions = list(classifier.predict(x_test, as_iterable=True))
+  score = metrics.accuracy_score(y_test, predictions)
   print('Accuracy: {0:f}'.format(score))
 
 
diff --git a/tensorflow/examples/skflow/iris_custom_decay_dnn.py b/tensorflow/examples/skflow/iris_custom_decay_dnn.py
index 7a34ca9f138..73c526cd4e6 100644
--- a/tensorflow/examples/skflow/iris_custom_decay_dnn.py
+++ b/tensorflow/examples/skflow/iris_custom_decay_dnn.py
@@ -42,7 +42,8 @@ def main(unused_argv):
                                               optimizer=optimizer_exp_decay)
 
   classifier.fit(x_train, y_train, steps=800)
-  score = metrics.accuracy_score(y_test, classifier.predict(x_test))
+  predictions = list(classifier.predict(x_test, as_iterable=True))
+  score = metrics.accuracy_score(y_test, predictions)
   print('Accuracy: {0:f}'.format(score))
 
 
diff --git a/tensorflow/examples/skflow/iris_custom_model.py b/tensorflow/examples/skflow/iris_custom_model.py
index 8e2ab2ec882..149ee47fa75 100644
--- a/tensorflow/examples/skflow/iris_custom_model.py
+++ b/tensorflow/examples/skflow/iris_custom_model.py
@@ -59,8 +59,9 @@ def main(unused_argv):
   classifier = learn.Estimator(model_fn=my_model)
   classifier.fit(x_train, y_train, steps=1000)
 
-  y_predicted = classifier.predict(x_test)
-  score = metrics.accuracy_score(y_test, y_predicted['class'])
+  y_predicted = [
+      p['class'] for p in classifier.predict(x_test, as_iterable=True)]
+  score = metrics.accuracy_score(y_test, y_predicted)
   print('Accuracy: {0:f}'.format(score))
 
 
diff --git a/tensorflow/examples/skflow/iris_run_config.py b/tensorflow/examples/skflow/iris_run_config.py
index 6ca563e9a32..b7b8b5cd01f 100644
--- a/tensorflow/examples/skflow/iris_run_config.py
+++ b/tensorflow/examples/skflow/iris_run_config.py
@@ -46,7 +46,8 @@ def main(unused_argv):
 
   # Fit and predict.
   classifier.fit(x_train, y_train, steps=200)
-  score = metrics.accuracy_score(y_test, classifier.predict(x_test))
+  predictions = list(classifier.predict(x_test, as_iterable=True))
+  score = metrics.accuracy_score(y_test, predictions)
   print('Accuracy: {0:f}'.format(score))
 
 
diff --git a/tensorflow/examples/skflow/iris_save_restore.py b/tensorflow/examples/skflow/iris_save_restore.py
index 8955f92b3af..e217b0604d6 100644
--- a/tensorflow/examples/skflow/iris_save_restore.py
+++ b/tensorflow/examples/skflow/iris_save_restore.py
@@ -33,7 +33,8 @@ classifier = learn.LinearClassifier(
     feature_columns=learn.infer_real_valued_columns_from_input(x_train),
     n_classes=3)
 classifier.fit(x_train, y_train, steps=200)
-score = metrics.accuracy_score(y_test, classifier.predict(x_test))
+predictions = classifier.predict(x_test, as_iterable=True)
+score = metrics.accuracy_score(y_test, predictions)
 print('Accuracy: {0:f}'.format(score))
 
 # Clean checkpoint folder if exists
diff --git a/tensorflow/examples/skflow/iris_val_based_early_stopping.py b/tensorflow/examples/skflow/iris_val_based_early_stopping.py
index 63a1c2b3c63..3d0129c735b 100644
--- a/tensorflow/examples/skflow/iris_val_based_early_stopping.py
+++ b/tensorflow/examples/skflow/iris_val_based_early_stopping.py
@@ -54,7 +54,8 @@ def main(unused_argv):
       feature_columns=learn.infer_real_valued_columns_from_input(x_train),
       hidden_units=[10, 20, 10], n_classes=3, model_dir=model_dir)
   classifier1.fit(x=x_train, y=y_train, steps=2000)
-  score1 = metrics.accuracy_score(y_test, classifier1.predict(x_test))
+  predictions1 = list(classifier1.predict(x_test, as_iterable=True))
+  score1 = metrics.accuracy_score(y_test, predictions1)
 
   model_dir = '/tmp/iris_model_val'
   clean_folder(model_dir)
@@ -66,7 +67,8 @@ def main(unused_argv):
       hidden_units=[10, 20, 10], n_classes=3, model_dir=model_dir,
       config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
   classifier2.fit(x=x_train, y=y_train, steps=2000, monitors=[val_monitor])
-  score2 = metrics.accuracy_score(y_test, classifier2.predict(x_test))
+  predictions2 = list(classifier2.predict(x_test, as_iterable=True))
+  score2 = metrics.accuracy_score(y_test, predictions2)
 
   # In many applications, the score is improved by using early stopping
   print('score1: ', score1)
diff --git a/tensorflow/examples/skflow/text_classification.py b/tensorflow/examples/skflow/text_classification.py
index 3d34617016c..5d6b93e18fa 100644
--- a/tensorflow/examples/skflow/text_classification.py
+++ b/tensorflow/examples/skflow/text_classification.py
@@ -101,8 +101,9 @@ def main(unused_argv):
 
   # Train and predict
   classifier.fit(x_train, y_train, steps=100)
-  y_predicted = classifier.predict(x_test)
-  score = metrics.accuracy_score(y_test, y_predicted['class'])
+  y_predicted = [
+      p['class'] for p in classifier.predict(x_test, as_iterable=True)]
+  score = metrics.accuracy_score(y_test, y_predicted)
   print('Accuracy: {0:f}'.format(score))
 
 
diff --git a/tensorflow/examples/skflow/text_classification_character_cnn.py b/tensorflow/examples/skflow/text_classification_character_cnn.py
index be627f316e5..adcb29e76e8 100644
--- a/tensorflow/examples/skflow/text_classification_character_cnn.py
+++ b/tensorflow/examples/skflow/text_classification_character_cnn.py
@@ -99,8 +99,9 @@ def main(unused_argv):
 
   # Train and predict
   classifier.fit(x_train, y_train, steps=100)
-  y_predicted = classifier.predict(x_test)
-  score = metrics.accuracy_score(y_test, y_predicted['class'])
+  y_predicted = [
+      p['class'] for p in classifier.predict(x_test, as_iterable=True)]
+  score = metrics.accuracy_score(y_test, y_predicted)
   print('Accuracy: {0:f}'.format(score))
 
 
diff --git a/tensorflow/examples/skflow/text_classification_character_rnn.py b/tensorflow/examples/skflow/text_classification_character_rnn.py
index 864f678d4e4..736c13af0bd 100644
--- a/tensorflow/examples/skflow/text_classification_character_rnn.py
+++ b/tensorflow/examples/skflow/text_classification_character_rnn.py
@@ -80,8 +80,9 @@ def main(unused_argv):
 
   # Train and predict
   classifier.fit(x_train, y_train, steps=100)
-  y_predicted = classifier.predict(x_test)
-  score = metrics.accuracy_score(y_test, y_predicted['class'])
+  y_predicted = [
+      p['class'] for p in classifier.predict(x_test, as_iterable=True)]
+  score = metrics.accuracy_score(y_test, y_predicted)
   print('Accuracy: {0:f}'.format(score))
 
 
diff --git a/tensorflow/examples/skflow/text_classification_cnn.py b/tensorflow/examples/skflow/text_classification_cnn.py
index 46238d2f037..ae42d95ac29 100644
--- a/tensorflow/examples/skflow/text_classification_cnn.py
+++ b/tensorflow/examples/skflow/text_classification_cnn.py
@@ -100,8 +100,9 @@ def main(unused_argv):
 
   # Train and predict
   classifier.fit(x_train, y_train, steps=100)
-  y_predicted = classifier.predict(x_test)
-  score = metrics.accuracy_score(y_test, y_predicted['class'])
+  y_predicted = [
+      p['class'] for p in classifier.predict(x_test, as_iterable=True)]
+  score = metrics.accuracy_score(y_test, y_predicted)
   print('Accuracy: {0:f}'.format(score))
 
 
diff --git a/tensorflow/examples/tutorials/monitors/iris_monitors.py b/tensorflow/examples/tutorials/monitors/iris_monitors.py
index a9aa0d4ce09..b6448315c55 100644
--- a/tensorflow/examples/tutorials/monitors/iris_monitors.py
+++ b/tensorflow/examples/tutorials/monitors/iris_monitors.py
@@ -70,5 +70,5 @@ print("Accuracy: {0:f}".format(accuracy_score))
 # Classify two new flower samples.
 new_samples = np.array(
     [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float)
-y = classifier.predict(new_samples)
+y = list(classifier.predict(new_samples, as_iterable=True))
 print("Predictions: {}".format(str(y)))
diff --git a/tensorflow/g3doc/api_docs/python/constant_op.md b/tensorflow/g3doc/api_docs/python/constant_op.md
index d8e2747edc8..7c999d72d57 100644
--- a/tensorflow/g3doc/api_docs/python/constant_op.md
+++ b/tensorflow/g3doc/api_docs/python/constant_op.md
@@ -278,13 +278,15 @@ tf.range(limit) ==> [0, 1, 2, 3, 4]
 ##### Args:
 
 
-*  <b>`start`</b>: A 0-D (scalar) of type `int32`. First entry in sequence.
-    Defaults to 0.
+*  <b>`start`</b>: A 0-D (scalar) of type `int32`. Acts as first entry in the range if
+    `limit` is not None; otherwise, acts as range limit and first entry
+    defaults to 0.
 *  <b>`limit`</b>: A 0-D (scalar) of type `int32`. Upper limit of sequence,
-    exclusive.
-*  <b>`delta`</b>: A 0-D `Tensor` (scalar) of type `int32`. Optional. Default is 1.
-    Number that increments `start`.
-*  <b>`name`</b>: A name for the operation (optional).
+    exclusive. If None, defaults to the value of `start` while the first
+    entry of the range defaults to 0.
+*  <b>`delta`</b>: A 0-D `Tensor` (scalar) of type `int32`. Number that increments
+    `start`. Defaults to 1.
+*  <b>`name`</b>: A name for the operation. Defaults to "range".
 
 ##### Returns:
 
diff --git a/tensorflow/g3doc/api_docs/python/contrib.bayesflow.stochastic_tensor.md b/tensorflow/g3doc/api_docs/python/contrib.bayesflow.stochastic_tensor.md
index b7a9a803d4b..db0d4d00179 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.bayesflow.stochastic_tensor.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.bayesflow.stochastic_tensor.md
@@ -1451,6 +1451,96 @@ in a `stop_gradients` call to disable any possible backpropagation.
 
 
 
+- - -
+
+### `class tf.contrib.bayesflow.stochastic_tensor.MixtureTensor` {#MixtureTensor}
+
+`MixtureTensor` is a `StochasticTensor` backed by the distribution `Mixture`.
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.__init__(name=None, dist_value_type=None, loss_fn=score_function, **dist_args)` {#MixtureTensor.__init__}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.clone(name=None, **dist_args)` {#MixtureTensor.clone}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.distribution` {#MixtureTensor.distribution}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.dtype` {#MixtureTensor.dtype}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.entropy(name='entropy')` {#MixtureTensor.entropy}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.graph` {#MixtureTensor.graph}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.input_dict` {#MixtureTensor.input_dict}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.loss(final_loss, name='Loss')` {#MixtureTensor.loss}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.mean(name='mean')` {#MixtureTensor.mean}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.name` {#MixtureTensor.name}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.value(name='value')` {#MixtureTensor.value}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.value_type` {#MixtureTensor.value_type}
+
+
+
+
+
 - - -
 
 ### `class tf.contrib.bayesflow.stochastic_tensor.MultinomialTensor` {#MultinomialTensor}
diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
index 75478dc925d..a96badda2b6 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@@ -195,6 +195,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -344,6 +350,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -424,6 +440,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Distribution.log_survival_function(value, name='log_survival_function')` {#Distribution.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Distribution.mean(name='mean')` {#Distribution.mean}
@@ -615,6 +660,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Distribution.survival_function(value, name='survival_function')` {#Distribution.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Distribution.validate_args` {#Distribution.validate_args}
@@ -776,6 +847,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -925,6 +1002,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -1005,6 +1092,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Binomial.log_survival_function(value, name='log_survival_function')` {#Binomial.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Binomial.logits` {#Binomial.logits}
@@ -1217,6 +1333,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Binomial.survival_function(value, name='survival_function')` {#Binomial.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Binomial.validate_args` {#Binomial.validate_args}
@@ -1318,6 +1460,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -1467,6 +1615,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -1547,6 +1705,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Bernoulli.log_survival_function(value, name='log_survival_function')` {#Bernoulli.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Bernoulli.logits` {#Bernoulli.logits}
@@ -1759,6 +1946,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Bernoulli.survival_function(value, name='survival_function')` {#Bernoulli.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Bernoulli.validate_args` {#Bernoulli.validate_args}
@@ -1943,6 +2156,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -2092,6 +2311,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -2172,6 +2401,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Beta.log_survival_function(value, name='log_survival_function')` {#Beta.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Beta.mean(name='mean')` {#Beta.mean}
@@ -2363,6 +2621,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Beta.survival_function(value, name='survival_function')` {#Beta.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Beta.validate_args` {#Beta.validate_args}
@@ -2455,6 +2739,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -2604,6 +2894,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -2684,6 +2984,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Categorical.log_survival_function(value, name='log_survival_function')` {#Categorical.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Categorical.logits` {#Categorical.logits}
@@ -2889,6 +3218,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Categorical.survival_function(value, name='survival_function')` {#Categorical.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Categorical.validate_args` {#Categorical.validate_args}
@@ -2998,6 +3353,12 @@ Inverse scale parameter.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -3154,6 +3515,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -3234,6 +3605,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Chi2.log_survival_function(value, name='log_survival_function')` {#Chi2.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Chi2.mean(name='mean')` {#Chi2.mean}
@@ -3425,6 +3825,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Chi2.survival_function(value, name='survival_function')` {#Chi2.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Chi2.validate_args` {#Chi2.validate_args}
@@ -3534,6 +3960,12 @@ Inverse scale parameter.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -3690,6 +4122,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -3770,6 +4212,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Exponential.log_survival_function(value, name='log_survival_function')` {#Exponential.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Exponential.mean(name='mean')` {#Exponential.mean}
@@ -3961,6 +4432,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Exponential.survival_function(value, name='survival_function')` {#Exponential.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Exponential.validate_args` {#Exponential.validate_args}
@@ -4097,6 +4594,12 @@ Inverse scale parameter.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -4246,6 +4749,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -4326,6 +4839,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Gamma.log_survival_function(value, name='log_survival_function')` {#Gamma.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Gamma.mean(name='mean')` {#Gamma.mean}
@@ -4517,6 +5059,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Gamma.survival_function(value, name='survival_function')` {#Gamma.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Gamma.validate_args` {#Gamma.validate_args}
@@ -4649,6 +5217,12 @@ Scale parameter.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -4798,6 +5372,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -4878,6 +5462,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.InverseGamma.log_survival_function(value, name='log_survival_function')` {#InverseGamma.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.InverseGamma.mean(name='mean')` {#InverseGamma.mean}
@@ -5069,6 +5682,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.InverseGamma.survival_function(value, name='survival_function')` {#InverseGamma.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.InverseGamma.validate_args` {#InverseGamma.validate_args}
@@ -5176,6 +5815,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -5332,6 +5977,16 @@ Distribution parameter for the location.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -5412,6 +6067,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Laplace.log_survival_function(value, name='log_survival_function')` {#Laplace.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Laplace.mean(name='mean')` {#Laplace.mean}
@@ -5610,6 +6294,32 @@ Distribution parameter for scale.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Laplace.survival_function(value, name='survival_function')` {#Laplace.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Laplace.validate_args` {#Laplace.validate_args}
@@ -5747,6 +6457,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -5896,6 +6612,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -5976,6 +6702,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Normal.log_survival_function(value, name='log_survival_function')` {#Normal.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Normal.mean(name='mean')` {#Normal.mean}
@@ -6181,6 +6936,32 @@ Distribution parameter for standard deviation.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Normal.survival_function(value, name='survival_function')` {#Normal.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Normal.validate_args` {#Normal.validate_args}
@@ -6279,6 +7060,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -6435,6 +7222,16 @@ Rate parameter.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -6515,6 +7312,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Poisson.log_survival_function(value, name='log_survival_function')` {#Poisson.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Poisson.mean(name='mean')` {#Poisson.mean}
@@ -6706,6 +7532,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Poisson.survival_function(value, name='survival_function')` {#Poisson.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Poisson.validate_args` {#Poisson.validate_args}
@@ -6852,6 +7704,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -7008,6 +7866,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -7088,6 +7956,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.StudentT.log_survival_function(value, name='log_survival_function')` {#StudentT.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.StudentT.mean(name='mean')` {#StudentT.mean}
@@ -7293,6 +8190,32 @@ Scaling factors of these Student's t distribution(s).
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.StudentT.survival_function(value, name='survival_function')` {#StudentT.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.StudentT.validate_args` {#StudentT.validate_args}
@@ -7422,6 +8345,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -7571,6 +8500,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -7651,6 +8590,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Uniform.log_survival_function(value, name='log_survival_function')` {#Uniform.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Uniform.mean(name='mean')` {#Uniform.mean}
@@ -7849,6 +8817,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Uniform.survival_function(value, name='survival_function')` {#Uniform.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Uniform.validate_args` {#Uniform.validate_args}
@@ -7997,6 +8991,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -8146,6 +9146,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -8233,6 +9243,35 @@ Log probability density/mass function (depending on `is_continuous`).
 Log of determinant of covariance matrix.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiag.log_survival_function(value, name='log_survival_function')` {#MultivariateNormalDiag.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiag.mean(name='mean')` {#MultivariateNormalDiag.mean}
@@ -8445,6 +9484,32 @@ Determinant of covariance matrix.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiag.survival_function(value, name='survival_function')` {#MultivariateNormalDiag.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiag.validate_args` {#MultivariateNormalDiag.validate_args}
@@ -8580,6 +9645,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -8729,6 +9800,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -8816,6 +9897,35 @@ Log probability density/mass function (depending on `is_continuous`).
 Log of determinant of covariance matrix.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalFull.log_survival_function(value, name='log_survival_function')` {#MultivariateNormalFull.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalFull.mean(name='mean')` {#MultivariateNormalFull.mean}
@@ -9028,6 +10138,32 @@ Determinant of covariance matrix.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalFull.survival_function(value, name='survival_function')` {#MultivariateNormalFull.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalFull.validate_args` {#MultivariateNormalFull.validate_args}
@@ -9172,6 +10308,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -9321,6 +10463,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -9408,6 +10560,35 @@ Log probability density/mass function (depending on `is_continuous`).
 Log of determinant of covariance matrix.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalCholesky.log_survival_function(value, name='log_survival_function')` {#MultivariateNormalCholesky.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalCholesky.mean(name='mean')` {#MultivariateNormalCholesky.mean}
@@ -9620,6 +10801,32 @@ Determinant of covariance matrix.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalCholesky.survival_function(value, name='survival_function')` {#MultivariateNormalCholesky.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalCholesky.validate_args` {#MultivariateNormalCholesky.validate_args}
@@ -9854,6 +11061,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -10003,6 +11216,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -10083,6 +11306,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Dirichlet.log_survival_function(value, name='log_survival_function')` {#Dirichlet.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Dirichlet.mean(name='mean')` {#Dirichlet.mean}
@@ -10274,6 +11526,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Dirichlet.survival_function(value, name='survival_function')` {#Dirichlet.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Dirichlet.validate_args` {#Dirichlet.validate_args}
@@ -10461,6 +11739,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -10610,6 +11894,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -10690,6 +11984,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.DirichletMultinomial.log_survival_function(value, name='log_survival_function')` {#DirichletMultinomial.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.DirichletMultinomial.mean(name='mean')` {#DirichletMultinomial.mean}
@@ -10888,6 +12211,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.DirichletMultinomial.survival_function(value, name='survival_function')` {#DirichletMultinomial.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.DirichletMultinomial.validate_args` {#DirichletMultinomial.validate_args}
@@ -11057,6 +12406,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -11206,6 +12561,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -11286,6 +12651,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Multinomial.log_survival_function(value, name='log_survival_function')` {#Multinomial.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Multinomial.logits` {#Multinomial.logits}
@@ -11498,6 +12892,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Multinomial.survival_function(value, name='survival_function')` {#Multinomial.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Multinomial.validate_args` {#Multinomial.validate_args}
@@ -11651,6 +13071,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -11821,6 +13247,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -11908,6 +13344,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartCholesky.log_survival_function(value, name='log_survival_function')` {#WishartCholesky.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartCholesky.mean(name='mean')` {#WishartCholesky.mean}
@@ -12120,6 +13585,32 @@ Wishart distribution scale matrix as an OperatorPD.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartCholesky.survival_function(value, name='survival_function')` {#WishartCholesky.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartCholesky.validate_args` {#WishartCholesky.validate_args}
@@ -12269,6 +13760,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -12439,6 +13936,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -12526,6 +14033,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartFull.log_survival_function(value, name='log_survival_function')` {#WishartFull.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartFull.mean(name='mean')` {#WishartFull.mean}
@@ -12738,6 +14274,32 @@ Wishart distribution scale matrix as an OperatorPD.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartFull.survival_function(value, name='survival_function')` {#WishartFull.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartFull.validate_args` {#WishartFull.validate_args}
@@ -12876,6 +14438,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -13032,6 +14600,16 @@ Inverse function of transform, y => x.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -13119,6 +14697,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.TransformedDistribution.log_survival_function(value, name='log_survival_function')` {#TransformedDistribution.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.mean(name='mean')` {#TransformedDistribution.mean}
@@ -13310,6 +14917,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.TransformedDistribution.survival_function(value, name='survival_function')` {#TransformedDistribution.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.transform` {#TransformedDistribution.transform}
@@ -13333,6 +14966,719 @@ Variance.
 
 
 
+### Mixture Models
+
+- - -
+
+### `class tf.contrib.distributions.Mixture` {#Mixture}
+
+Mixture distribution.
+
+The `Mixture` object implements batched mixture distributions.
+The mixture model is defined by a `Categorical` distribution (the mixture)
+and a python list of `Distribution` objects.
+
+Methods supported include `log_prob`, `prob`, `mean`, `sample`, and
+`entropy_lower_bound`.
+- - -
+
+#### `tf.contrib.distributions.Mixture.__init__(cat, components, validate_args=True, allow_nan_stats=False, name='Mixture')` {#Mixture.__init__}
+
+Initialize a Mixture distribution.
+
+A `Mixture` is defined by a `Categorical` (`cat`, representing the
+mixture probabilities) and a list of `Distribution` objects
+all having matching dtype, batch shape, event shape, and continuity
+properties (the components).
+
+The user does not pass the list of distributions directly, but rather a
+list of `(constructor, batch_tensor_params_dict)` pairs,
+called `components`. The list of distributions is created via:
+
+```python
+distributions = [
+  c(**params_dict) for (c, params_dict) in zip(*components)
+]
+```
+
+This form allows for certain types of batch-shape optimizations within
+this class.
+
+An example of `components`:
+
+```python
+components = [
+  (tf.contrib.distributions.Normal, {"mu": 3.0, "sigma": 1.0}),
+  (functools.partial(tf.contrib.distributions.Normal, validate_args=False),
+   {"mu": 3.0, "sigma": 2.0}),
+  (tf.contrib.distributions.Normal.from_params,
+   {"mu": 1.0, "sigma": -1.0})
+]
+```
+
+The `num_classes` of `cat` must be possible to infer at graph construction
+time and match `len(distributions)`.
+
+##### Args:
+
+
+*  <b>`cat`</b>: A `Categorical` distribution instance, representing the probabilities
+      of `distributions`.
+*  <b>`components`</b>: A list or tuple of `(constructor, batch_tensor_params)`
+    tuples.  The `constructor` must be a callable, and `batch_tensor_params`
+    must be a dict mapping constructor kwargs to batchwise parameters.
+    Each `Distribution` instance created by calling
+    `constructor(**batch_tensor_params)` must have the same type, be defined
+    on the same domain, and have matching `event_shape` and `batch_shape`.
+*  <b>`validate_args`</b>: Boolean, default `True`.  If `True`, raise a runtime error
+    if batch or event ranks are inconsistent between cat and any of the
+    distributions.  This is only checked if the ranks cannot be determined
+    statically at graph construction time.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+   exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
+*  <b>`name`</b>: A name for this distribution (optional).
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: If cat is not a `Categorical`, or `components` is not
+    a list or tuple, or the elements of `components` are not
+    tuples of the form `(callable, dict)`, or the objects resulting
+    from calling `callable(**dict)` are not instances of `Distribution`, or
+    the resulting instances of `Distribution` do not have matching
+    continuity properties, or do not have matching `dtype`.
+*  <b>`ValueError`</b>: If `components` is an empty list or tuple, or the
+    distributions created from `components` do have a statically known event
+    rank.  If `cat.num_classes` cannot be inferred at graph creation time,
+    or the constant value of `cat.num_classes` is not equal to
+    `len(distributions)`, or all `distributions` and `cat` do not have
+    matching static batch shapes, or all components' distributions do not
+    have matching static event shapes.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.allow_nan_stats` {#Mixture.allow_nan_stats}
+
+Python boolean describing behavior when a stat is undefined.
+
+Stats return +/- infinity when it makes sense.  E.g., the variance
+of a Cauchy distribution is infinity.  However, sometimes the
+statistic is undefined, e.g., if a distribution's pdf does not achieve a
+maximum within the support of the distribution, the mode is undefined.
+If the mean is undefined, then by definition the variance is undefined.
+E.g. the mean for Student's T for df = 1 is undefined (no clear way to say
+it is either + or - infinity), so the variance = E[(X - mean)^2] is also
+undefined.
+
+##### Returns:
+
+
+*  <b>`allow_nan_stats`</b>: Python boolean.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.batch_shape(name='batch_shape')` {#Mixture.batch_shape}
+
+Shape of a single sample from a single event index as a 1-D `Tensor`.
+
+The product of the dimensions of the `batch_shape` is the number of
+independent distributions of this kind the instance represents.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+
+*  <b>`batch_shape`</b>: `Tensor`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.cat` {#Mixture.cat}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.cdf(value, name='cdf')` {#Mixture.cdf}
+
+Cumulative distribution function.
+
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`cdf`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.distributions` {#Mixture.distributions}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.dtype` {#Mixture.dtype}
+
+The `DType` of `Tensor`s handled by this `Distribution`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.entropy(name='entropy')` {#Mixture.entropy}
+
+Shanon entropy in nats.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.entropy_lower_bound(name='entropy_lower_bound')` {#Mixture.entropy_lower_bound}
+
+A lower bound on the entropy of this mixture model.
+
+The bound below is not always very tight, and its usefulness depends
+on the mixture probabilities and the distributions in use.
+
+A lower bound is useful for ELBO when the `Mixture` is the variational
+distribution:
+
+\\(
+\log p(x) >= ELBO = \int q(z) \log p(x, z) dz + H[q]
+\\)
+
+where \\( p \\) is the prior disribution, \\( q \\) is the variational,
+and \\( H[q] \\) is the entropy of \\( q \\).  If there is a lower bound
+\\( G[q] \\) such that \\( H[q] \geq G[q] \\) then it can be used in
+place of \\( H[q] \\).
+
+For a mixture of distributions \\( q(Z) = \sum_i c_i q_i(Z) \\) with
+\\( \sum_i c_i = 1 \\), by the concavity of \\( f(x) = -x \log x \\), a
+simple lower bound is:
+
+\\(
+\begin{align}
+H[q] & = - \int q(z) \log q(z) dz \\\
+   & = - \int (\sum_i c_i q_i(z)) \log(\sum_i c_i q_i(z)) dz \\\
+   & \geq - \sum_i c_i \int q_i(z) \log q_i(z) dz \\\
+   & = \sum_i c_i H[q_i]
+\end{align}
+\\)
+
+This is the term we calculate below for \\( G[q] \\).
+
+##### Args:
+
+
+*  <b>`name`</b>: A name for this operation (optional).
+
+##### Returns:
+
+  A lower bound on the Mixture's entropy.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.event_shape(name='event_shape')` {#Mixture.event_shape}
+
+Shape of a single sample from a single batch as a 1-D int32 `Tensor`.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+
+*  <b>`event_shape`</b>: `Tensor`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.from_params(cls, make_safe=True, **kwargs)` {#Mixture.from_params}
+
+Given (unconstrained) parameters, return an instantiated distribution.
+
+Subclasses should implement a static method `_safe_transforms` that returns
+a dict of parameter transforms, which will be used if `make_safe = True`.
+
+Example usage:
+
+```
+# Let's say we want a sample of size (batch_size, 10)
+shapes = MultiVariateNormalDiag.param_shapes([batch_size, 10])
+
+# shapes has a Tensor shape for mu and sigma
+# shapes == {
+#   "mu": tf.constant([batch_size, 10]),
+#   "sigma": tf.constant([batch_size, 10]),
+# }
+
+# Here we parameterize mu and sigma with the output of a linear
+# layer. Note that sigma is unconstrained.
+params = {}
+for name, shape in shapes.items():
+  params[name] = linear(x, shape[1])
+
+# Note that you can forward other kwargs to the `Distribution`, like
+# `allow_nan_stats` or `name`.
+mvn = MultiVariateNormalDiag.from_params(**params, allow_nan_stats=True)
+```
+
+Distribution parameters may have constraints (e.g. `sigma` must be positive
+for a `Normal` distribution) and the `from_params` method will apply default
+parameter transforms. If a user wants to use their own transform, they can
+apply it externally and set `make_safe=False`.
+
+##### Args:
+
+
+*  <b>`make_safe`</b>: Whether the `params` should be constrained. If True,
+    `from_params` will apply default parameter transforms. If False, no
+    parameter transforms will be applied.
+*  <b>`**kwargs`</b>: dict of parameters for the distribution.
+
+##### Returns:
+
+  A distribution parameterized by possibly transformed parameters in
+  `kwargs`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `make_safe` is `True` but `_safe_transforms` is not
+    implemented directly for `cls`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.get_batch_shape()` {#Mixture.get_batch_shape}
+
+Shape of a single sample from a single event index as a `TensorShape`.
+
+Same meaning as `batch_shape`. May be only partially defined.
+
+##### Returns:
+
+
+*  <b>`batch_shape`</b>: `TensorShape`, possibly unknown.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.get_event_shape()` {#Mixture.get_event_shape}
+
+Shape of a single sample from a single batch as a `TensorShape`.
+
+Same meaning as `event_shape`. May be only partially defined.
+
+##### Returns:
+
+
+*  <b>`event_shape`</b>: `TensorShape`, possibly unknown.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.is_continuous` {#Mixture.is_continuous}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.is_reparameterized` {#Mixture.is_reparameterized}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.log_cdf(value, name='log_cdf')` {#Mixture.log_cdf}
+
+Log cumulative distribution function.
+
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`logcdf`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.log_pdf(value, name='log_pdf')` {#Mixture.log_pdf}
+
+Log probability density function.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`log_prob`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+##### Raises:
+
+
+*  <b>`AttributeError`</b>: if not `is_continuous`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.log_pmf(value, name='log_pmf')` {#Mixture.log_pmf}
+
+Log probability mass function.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`log_pmf`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+##### Raises:
+
+
+*  <b>`AttributeError`</b>: if `is_continuous`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.log_prob(value, name='log_prob')` {#Mixture.log_prob}
+
+Log probability density/mass function (depending on `is_continuous`).
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`log_prob`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.log_survival_function(value, name='log_survival_function')` {#Mixture.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.mean(name='mean')` {#Mixture.mean}
+
+Mean.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.mode(name='mode')` {#Mixture.mode}
+
+Mode.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.name` {#Mixture.name}
+
+Name prepended to all ops created by this `Distribution`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.num_components` {#Mixture.num_components}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.param_shapes(cls, sample_shape, name='DistributionParamShapes')` {#Mixture.param_shapes}
+
+Shapes of parameters given the desired shape of a call to `sample()`.
+
+Subclasses should override static method `_param_shapes`.
+
+##### Args:
+
+
+*  <b>`sample_shape`</b>: `Tensor` or python list/tuple. Desired shape of a call to
+    `sample()`.
+*  <b>`name`</b>: name to prepend ops with.
+
+##### Returns:
+
+  `dict` of parameter name to `Tensor` shapes.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.param_static_shapes(cls, sample_shape)` {#Mixture.param_static_shapes}
+
+param_shapes with static (i.e. TensorShape) shapes.
+
+##### Args:
+
+
+*  <b>`sample_shape`</b>: `TensorShape` or python list/tuple. Desired shape of a call
+    to `sample()`.
+
+##### Returns:
+
+  `dict` of parameter name to `TensorShape`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if `sample_shape` is a `TensorShape` and is not fully defined.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.parameters` {#Mixture.parameters}
+
+Dictionary of parameters used by this `Distribution`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.pdf(value, name='pdf')` {#Mixture.pdf}
+
+Probability density function.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`prob`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+##### Raises:
+
+
+*  <b>`AttributeError`</b>: if not `is_continuous`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.pmf(value, name='pmf')` {#Mixture.pmf}
+
+Probability mass function.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`pmf`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+##### Raises:
+
+
+*  <b>`AttributeError`</b>: if `is_continuous`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.prob(value, name='prob')` {#Mixture.prob}
+
+Probability density/mass function (depending on `is_continuous`).
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`prob`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.sample(sample_shape=(), seed=None, name='sample')` {#Mixture.sample}
+
+Generate samples of the specified shape.
+
+Note that a call to `sample()` without arguments will generate a single
+sample.
+
+##### Args:
+
+
+*  <b>`sample_shape`</b>: 0D or 1D `int32` `Tensor`. Shape of the generated samples.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` with prepended dimensions `sample_shape`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.sample_n(n, seed=None, name='sample_n')` {#Mixture.sample_n}
+
+Generate `n` samples.
+
+##### Args:
+
+
+*  <b>`n`</b>: `Scalar` `Tensor` of type `int32` or `int64`, the number of
+    observations to sample.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` with a prepended dimension (n,).
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `n` is not an integer type.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.std(name='std')` {#Mixture.std}
+
+Standard deviation.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.survival_function(value, name='survival_function')` {#Mixture.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.validate_args` {#Mixture.validate_args}
+
+Python boolean indicated possibly expensive checks are enabled.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.variance(name='variance')` {#Mixture.variance}
+
+Variance.
+
+
+
+
 ## Posterior inference with conjugate priors.
 
 Functions that transform conjugate prior/likelihood pairs to distributions
@@ -13722,6 +16068,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -13871,6 +16223,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -13958,6 +16320,35 @@ Log probability density/mass function (depending on `is_continuous`).
 Log of determinant of covariance matrix.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.log_survival_function(value, name='log_survival_function')` {#MultivariateNormalDiagPlusVDVT.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.mean(name='mean')` {#MultivariateNormalDiagPlusVDVT.mean}
@@ -14170,6 +16561,32 @@ Determinant of covariance matrix.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.survival_function(value, name='survival_function')` {#MultivariateNormalDiagPlusVDVT.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.validate_args` {#MultivariateNormalDiagPlusVDVT.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/contrib.learn.md b/tensorflow/g3doc/api_docs/python/contrib.learn.md
index 47b5e7e5944..f5de19ae3e1 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.learn.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.learn.md
@@ -66,7 +66,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
@@ -269,7 +269,7 @@ component of a nested object.
 Estimator class is the basic TensorFlow model trainer/evaluator.
 - - -
 
-#### `tf.contrib.learn.Estimator.__init__(model_fn=None, model_dir=None, config=None, params=None, weight_column_name=None)` {#Estimator.__init__}
+#### `tf.contrib.learn.Estimator.__init__(model_fn=None, model_dir=None, config=None, params=None)` {#Estimator.__init__}
 
 Constructs an Estimator instance.
 
@@ -284,7 +284,7 @@ Constructs an Estimator instance.
       * `(features, targets, mode) -> (predictions, loss, train_op)`
       * `(features, targets, mode, params) -> (predictions, loss, train_op)`
 
-  Where
+    Where
 
       * `features` are single `Tensor` or `dict` of `Tensor`s
              (depending on data passed to `fit`),
@@ -306,9 +306,6 @@ Constructs an Estimator instance.
 *  <b>`config`</b>: Configuration object.
 *  <b>`params`</b>: `dict` of hyper parameters that will be passed into `model_fn`.
           Keys are names of parameters, values are basic python types.
-*  <b>`weight_column_name`</b>: A string defining feature column name representing
-    weights. It is used to down weight or boost examples during training. It
-    will be multiplied by the loss of the example.
 
 ##### Raises:
 
@@ -345,7 +342,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
@@ -610,8 +607,6 @@ Input of `fit` and `evaluate` should have following features,
     Both features' `value` must be a `SparseTensor`.
   - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
     whose `value` is a `Tensor`.
-  - if `feature_columns` is `None`, then `input` must contain only real
-    valued `Tensor`.
 - - -
 
 #### `tf.contrib.learn.DNNClassifier.__init__(hidden_units, feature_columns, model_dir=None, n_classes=2, weight_column_name=None, optimizer=None, activation_fn=relu, dropout=None, gradient_clip_norm=None, enable_centered_bias=True, config=None)` {#DNNClassifier.__init__}
@@ -704,7 +699,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
@@ -994,8 +989,6 @@ Input of `fit` and `evaluate` should have following features,
     Both features' `value` must be a `SparseTensor`.
   - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
     whose `value` is a `Tensor`.
-  - if `feature_columns` is `None`, then `input` must contain only real
-    valued `Tensor`.
 - - -
 
 #### `tf.contrib.learn.DNNRegressor.__init__(hidden_units, feature_columns, model_dir=None, weight_column_name=None, optimizer=None, activation_fn=relu, dropout=None, gradient_clip_norm=None, enable_centered_bias=True, config=None)` {#DNNRegressor.__init__}
@@ -1086,7 +1079,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
@@ -1365,7 +1358,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
@@ -1628,7 +1621,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
@@ -1919,7 +1912,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
@@ -2249,8 +2242,6 @@ Input of `fit` and `evaluate` should have following features,
     Both features' `value` must be a `SparseTensor`.
   - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
     whose `value` is a `Tensor`.
-  - if `feature_columns` is `None`, then `input` must contains only real
-    valued `Tensor`.
 - - -
 
 #### `tf.contrib.learn.LinearClassifier.__init__(feature_columns, model_dir=None, n_classes=2, weight_column_name=None, optimizer=None, gradient_clip_norm=None, enable_centered_bias=True, config=None)` {#LinearClassifier.__init__}
@@ -2408,8 +2399,6 @@ Input of `fit` and `evaluate` should have following features,
        key=weight column name, value=a `SparseTensor`}
   - if isinstance(column, `RealValuedColumn`):
       key=column.name, value=a `Tensor`
-  - if `feature_columns` is `None`:
-      input must contains only real valued `Tensor`.
 - - -
 
 #### `tf.contrib.learn.LinearRegressor.__init__(feature_columns, model_dir=None, weight_column_name=None, optimizer=None, gradient_clip_norm=None, enable_centered_bias=True, target_dimension=1, config=None)` {#LinearRegressor.__init__}
@@ -2494,7 +2483,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
@@ -2815,7 +2804,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
@@ -3194,7 +3183,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
diff --git a/tensorflow/g3doc/api_docs/python/contrib.losses.md b/tensorflow/g3doc/api_docs/python/contrib.losses.md
index 5cc44c9aea1..fae21fc3cc6 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.losses.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.losses.md
@@ -348,8 +348,40 @@ If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
 ##### Raises:
 
 
-*  <b>`ValueError`</b>: If the shape of `predictions` doesn't match that of `targets` or
-    if the shape of `weight` is invalid or if `weight` is None.
+*  <b>`ValueError`</b>: If the shape of `logits` doesn't match that of `onehot_labels`
+    or if the shape of `weight` is invalid or if `weight` is None.
+
+
+- - -
+
+### `tf.contrib.losses.sparse_softmax_cross_entropy(logits, labels, weight=1.0, scope=None)` {#sparse_softmax_cross_entropy}
+
+Cross-entropy loss using tf.nn.sparse_softmax_cross_entropy_with_logits.
+
+`weight` acts as a coefficient for the loss. If a scalar is provided,
+then the loss is simply scaled by the given value. If `weight` is a
+tensor of size [`batch_size`], then the loss weights apply to each
+corresponding sample.
+
+##### Args:
+
+
+*  <b>`logits`</b>: [batch_size, num_classes] logits outputs of the network .
+*  <b>`labels`</b>: [batch_size, 1] or [batch_size] target labels of dtype `int32` or
+    `int64` in the range `[0, num_classes)`.
+*  <b>`weight`</b>: Coefficients for the loss. The tensor must be a scalar or a tensor
+    of shape [batch_size] or [batch_size, 1].
+*  <b>`scope`</b>: the scope for the operations performed in computing the loss.
+
+##### Returns:
+
+  A scalar `Tensor` representing the loss value.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If the shapes of logits, labels, and weight are incompatible, or
+    if `weight` is None.
 
 
 - - -
diff --git a/tensorflow/g3doc/api_docs/python/contrib.metrics.md b/tensorflow/g3doc/api_docs/python/contrib.metrics.md
index 2580504e467..8da020965f2 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.metrics.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.metrics.md
@@ -1194,30 +1194,6 @@ and update ops when the list of metrics is long. For example:
   names to update ops.
 
 
-- - -
-
-### `tf.contrib.metrics.run_metric(metric, predictions, targets, weights=None)` {#run_metric}
-
-Runs a single metric.
-
-This function runs metric on given predictions and targets. weights will be
-used if metric contains 'weights' in its argument.
-
-##### Args:
-
-
-*  <b>`metric`</b>: A function that evaluates targets given predictions.
-*  <b>`predictions`</b>: A `Tensor` of arbitrary shape.
-*  <b>`targets`</b>: A `Tensor` of the same shape as `predictions`.
-*  <b>`weights`</b>: A set of weights that can be used in metric function to compute
-    weighted result.
-
-##### Returns:
-
-
-*  <b>`result`</b>: result returned by metric function.
-
-
 
 ## Set `Ops`
 
diff --git a/tensorflow/g3doc/api_docs/python/contrib.training.md b/tensorflow/g3doc/api_docs/python/contrib.training.md
index ebecf7dfca8..ebeef623e9e 100644
--- a/tensorflow/g3doc/api_docs/python/contrib.training.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.training.md
@@ -853,3 +853,155 @@ known ahead of time.
   ...
 
 
+
+## Bucketing
+
+Use ['bucket'](#bucket) or
+['bucket_by_sequence_length'](#bucket_by_sequence_length) to stratify
+minibatches into groups ("buckets").  Use `bucket_by_sequence_length`
+with the argument `dynamic_pad=True` to receive minibatches of similarly
+sized sequences for efficient training via `dynamic_rnn`.
+
+- - -
+
+### `tf.contrib.training.bucket(tensors, which_bucket, batch_size, num_buckets, num_threads=1, capacity=32, shapes=None, dynamic_pad=False, allow_smaller_final_batch=False, keep_input=None, shared_name=None, name=None)` {#bucket}
+
+Lazy bucketing of input tensors according to `which_bucket`.
+
+The argument `tensors` can be a list or a dictionary of tensors.
+The value returned by the function will be of the same type
+as `tensors`.
+
+The tensors entering this function are put into the bucket given by
+`which_bucket`.  Each bucket has its own queue.  When a bucket contains
+`batch_size` elements, this minibatch is pushed onto a top queue.  The
+tensors returned from this function are a the result of dequeueing the
+next minibatch from this top queue.
+
+This function is implemented using several queues. A `QueueRunner` for the
+queues is added to the current `Graph`'s `QUEUE_RUNNER` collection.
+
+As the returned tensors are the result of of a dequeue operation, evaluating
+them will throw a `tf.errors.OutOfRangeError` when the input queue is
+exhausted.  If these tensors are feeding another input queue, its queue runner
+will catch this exception, however, if they are used in your main thread
+you are responsible for catching this yourself.
+
+*N.B.:* If `dynamic_pad` is `False`, you must ensure that either
+(i) the `shapes` argument is passed, or (ii) all of the tensors in
+`tensors` must have fully-defined shapes. `ValueError` will be
+raised if neither of these conditions holds.
+
+If `dynamic_pad` is `True`, it is sufficient that the *rank* of the
+tensors is known, but individual dimensions may have shape `None`.
+In this case, for each enqueue the dimensions with value `None`
+may have a variable length; upon dequeue, the output tensors will be padded
+on the right to the maximum shape of the tensors in the current minibatch.
+For numbers, this padding takes value 0.  For strings, this padding is
+the empty string.  See `PaddingFIFOQueue` for more info.
+
+If `allow_smaller_final_batch` is `True`, a smaller batch value than
+`batch_size` is returned when the queues are closed and there are not enough
+elements to fill the batch, otherwise the pending elements are discarded.
+In addition, all output tensors' static shapes, as accessed via the
+`get_shape()` method will have a 0th `Dimension` value of `None`, and
+operations that depend on fixed batch_size would fail.
+
+##### Args:
+
+
+*  <b>`tensors`</b>: The list or dictionary of tensors, representing a single element,
+    to bucket.  Nested lists are not supported.
+*  <b>`which_bucket`</b>: An `int32` scalar Tensor taking a value in `[0, num_buckets)`.
+*  <b>`batch_size`</b>: The new batch size pulled from the queue
+    (python int or int32 scalar).
+*  <b>`num_buckets`</b>: A python integer, the number of buckets.
+*  <b>`num_threads`</b>: An integer.  The number of threads enqueuing `tensors`.
+*  <b>`capacity`</b>: An integer. The maximum number of minibatches in the top queue,
+    and also the maximum number of elements within each bucket.
+*  <b>`shapes`</b>: (Optional) The shapes for each example.  Defaults to the
+    inferred shapes for `tensors`.
+*  <b>`dynamic_pad`</b>: Boolean.  Allow variable dimensions in input shapes.
+    The given dimensions are padded upon dequeue so that tensors within a
+    batch have the same shapes.
+*  <b>`allow_smaller_final_batch`</b>: (Optional) Boolean. If `True`, allow the final
+    batches to be smaller if there are insufficient items left in the queues.
+*  <b>`keep_input`</b>: (Optional).  A `bool` scalar Tensor.  If provided, this tensor
+    controls whether the input is added to the queue or not.  If it evaluates
+    `True`, then `tensors` are added to the bucket; otherwise they are
+    dropped.  This tensor essentially acts as a filtering mechanism.
+    The default behavior is to assume `keep_input=True`.
+*  <b>`shared_name`</b>: (Optional). If set, the queues will be shared under the given
+    name across multiple sessions.
+*  <b>`name`</b>: (Optional) A name for the operations.
+
+##### Returns:
+
+  A tuple `(bucket, outputs)` where `bucket` is
+  a `int32` scalar tensor and `outputs` is a list or
+  dictionary of batched outputs corresponding to elements of `tensors`.
+  Every step will receive a new bucket of outputs.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If the `shapes` are not specified, and cannot be
+    inferred from the elements of `tensors`.
+
+
+- - -
+
+### `tf.contrib.training.bucket_by_sequence_length(input_length, tensors, batch_size, bucket_boundaries, num_threads=1, capacity=32, shapes=None, dynamic_pad=False, allow_smaller_final_batch=False, keep_input=None, shared_name=None, name=None)` {#bucket_by_sequence_length}
+
+Lazy bucketing of inputs according to their length.
+
+This method calls `tf.contrib.training.bucket` under the hood, after first
+subdividing the bucket boundaries into separate buckets and identifying which
+bucket the given `input_length` belongs to.  See the documentation for
+`which_bucket` for details of the other arguments.
+
+##### Args:
+
+
+*  <b>`input_length`</b>: `int32` scalar `Tensor`, the sequence length of tensors.
+*  <b>`tensors`</b>: The list or dictionary of tensors, representing a single element,
+    to bucket.  Nested lists are not supported.
+*  <b>`batch_size`</b>: The new batch size pulled from the queue
+    (python int or int32 scalar).
+*  <b>`bucket_boundaries`</b>: int list, increasing non-negative numbers.
+    The edges of the buckets to use when bucketing tensors.  Two extra buckets
+    are created, one for `input_length < bucket_boundaries[0]` and
+    one for `input_length >= bucket_boundaries[-1]`.
+*  <b>`num_threads`</b>: An integer.  The number of threads enqueuing `tensors`.
+*  <b>`capacity`</b>: An integer. The maximum number of minibatches in the top queue,
+    and also the maximum number of elements within each bucket.
+*  <b>`shapes`</b>: (Optional) The shapes for each example.  Defaults to the
+    inferred shapes for `tensors`.
+*  <b>`dynamic_pad`</b>: Boolean.  Allow variable dimensions in input shapes.
+    The given dimensions are padded upon dequeue so that tensors within a
+    batch have the same shapes.
+*  <b>`allow_smaller_final_batch`</b>: (Optional) Boolean. If `True`, allow the final
+    batches to be smaller if there are insufficient items left in the queues.
+*  <b>`keep_input`</b>: (Optional).  A `bool` scalar Tensor.  If provided, this tensor
+    controls whether the input is added to the queue or not.  If it evaluates
+    `True`, then `tensors` are added to the bucket; otherwise they are
+    dropped.  This tensor essentially acts as a filtering mechanism.
+    The default behavior is to assume `keep_input=True`.
+*  <b>`shared_name`</b>: (Optional). If set, the queues will be shared under the given
+    name across multiple sessions.
+*  <b>`name`</b>: (Optional) A name for the operations.
+
+##### Returns:
+
+  A tuple `(sequence_length, outputs)` where `sequence_length` is
+  a 1-D `Tensor` of size `batch_size` and `outputs` is a list or dictionary
+  of batched, bucketed, outputs corresponding to elements of `tensors`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `bucket_boundaries` is not a list of python integers.
+*  <b>`ValueError`</b>: if `bucket_boundaries` is empty or contains non-increasing
+    values.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
index 235aac6beba..ab6370371f4 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md
@@ -80,6 +80,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -229,6 +235,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -309,6 +325,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Bernoulli.log_survival_function(value, name='log_survival_function')` {#Bernoulli.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Bernoulli.logits` {#Bernoulli.logits}
@@ -521,6 +566,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Bernoulli.survival_function(value, name='survival_function')` {#Bernoulli.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Bernoulli.validate_args` {#Bernoulli.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md
index 0da51e79e2f..09b49097603 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md
@@ -154,6 +154,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -303,6 +309,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -383,6 +399,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Dirichlet.log_survival_function(value, name='log_survival_function')` {#Dirichlet.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Dirichlet.mean(name='mean')` {#Dirichlet.mean}
@@ -574,6 +619,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Dirichlet.survival_function(value, name='survival_function')` {#Dirichlet.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Dirichlet.validate_args` {#Dirichlet.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md
index 80ae0062e62..cbfdefffd56 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md
@@ -177,6 +177,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -326,6 +332,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -406,6 +422,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Distribution.log_survival_function(value, name='log_survival_function')` {#Distribution.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Distribution.mean(name='mean')` {#Distribution.mean}
@@ -597,6 +642,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Distribution.survival_function(value, name='survival_function')` {#Distribution.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Distribution.validate_args` {#Distribution.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md
index 085d7a660ef..27a938ed3f8 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md
@@ -123,6 +123,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -272,6 +278,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -359,6 +375,35 @@ Log probability density/mass function (depending on `is_continuous`).
 Log of determinant of covariance matrix.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalCholesky.log_survival_function(value, name='log_survival_function')` {#MultivariateNormalCholesky.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalCholesky.mean(name='mean')` {#MultivariateNormalCholesky.mean}
@@ -571,6 +616,32 @@ Determinant of covariance matrix.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalCholesky.survival_function(value, name='survival_function')` {#MultivariateNormalCholesky.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalCholesky.validate_args` {#MultivariateNormalCholesky.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
index dca8a8a2a53..144c0dd2b23 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md
@@ -40,8 +40,6 @@ Input of `fit` and `evaluate` should have following features,
        key=weight column name, value=a `SparseTensor`}
   - if isinstance(column, `RealValuedColumn`):
       key=column.name, value=a `Tensor`
-  - if `feature_columns` is `None`:
-      input must contains only real valued `Tensor`.
 - - -
 
 #### `tf.contrib.learn.LinearRegressor.__init__(feature_columns, model_dir=None, weight_column_name=None, optimizer=None, gradient_clip_norm=None, enable_centered_bias=True, target_dimension=1, config=None)` {#LinearRegressor.__init__}
@@ -126,7 +124,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md
index bc8fb0e8288..6aca7ccd34c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md
@@ -122,6 +122,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -271,6 +277,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -358,6 +374,35 @@ Log probability density/mass function (depending on `is_continuous`).
 Log of determinant of covariance matrix.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiag.log_survival_function(value, name='log_survival_function')` {#MultivariateNormalDiag.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiag.mean(name='mean')` {#MultivariateNormalDiag.mean}
@@ -570,6 +615,32 @@ Determinant of covariance matrix.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiag.survival_function(value, name='survival_function')` {#MultivariateNormalDiag.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiag.validate_args` {#MultivariateNormalDiag.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md
index 3109e3e4ace..146a3a6d130 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md
@@ -125,6 +125,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -281,6 +287,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -361,6 +377,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.StudentT.log_survival_function(value, name='log_survival_function')` {#StudentT.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.StudentT.mean(name='mean')` {#StudentT.mean}
@@ -566,6 +611,32 @@ Scaling factors of these Student's t distribution(s).
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.StudentT.survival_function(value, name='survival_function')` {#StudentT.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.StudentT.validate_args` {#StudentT.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
index 6cc092faade..e6580c3c80c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md
@@ -114,6 +114,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -270,6 +276,16 @@ Inverse function of transform, y => x.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -357,6 +373,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.TransformedDistribution.log_survival_function(value, name='log_survival_function')` {#TransformedDistribution.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.mean(name='mean')` {#TransformedDistribution.mean}
@@ -548,6 +593,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.TransformedDistribution.survival_function(value, name='survival_function')` {#TransformedDistribution.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.TransformedDistribution.transform` {#TransformedDistribution.transform}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
index c0590b05a6f..86f06ff940f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md
@@ -57,8 +57,6 @@ Input of `fit` and `evaluate` should have following features,
     Both features' `value` must be a `SparseTensor`.
   - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
     whose `value` is a `Tensor`.
-  - if `feature_columns` is `None`, then `input` must contains only real
-    valued `Tensor`.
 - - -
 
 #### `tf.contrib.learn.LinearClassifier.__init__(feature_columns, model_dir=None, n_classes=2, weight_column_name=None, optimizer=None, gradient_clip_norm=None, enable_centered_bias=True, config=None)` {#LinearClassifier.__init__}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
index bd3b960af86..12340efd1f8 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md
@@ -71,6 +71,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -220,6 +226,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -300,6 +316,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Categorical.log_survival_function(value, name='log_survival_function')` {#Categorical.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Categorical.logits` {#Categorical.logits}
@@ -505,6 +550,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Categorical.survival_function(value, name='survival_function')` {#Categorical.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Categorical.validate_args` {#Categorical.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md
index 9734c0420fe..a8f0b8776b7 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md
@@ -88,6 +88,12 @@ Inverse scale parameter.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -244,6 +250,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -324,6 +340,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Chi2.log_survival_function(value, name='log_survival_function')` {#Chi2.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Chi2.mean(name='mean')` {#Chi2.mean}
@@ -515,6 +560,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Chi2.survival_function(value, name='survival_function')` {#Chi2.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Chi2.validate_args` {#Chi2.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md
index c40c7637d62..5cbf36c8fec 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md
@@ -108,6 +108,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -257,6 +263,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -337,6 +353,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Uniform.log_survival_function(value, name='log_survival_function')` {#Uniform.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Uniform.mean(name='mean')` {#Uniform.mean}
@@ -535,6 +580,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Uniform.survival_function(value, name='survival_function')` {#Uniform.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Uniform.validate_args` {#Uniform.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md
index fd5a7b705fe..10efb1a18b7 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md
@@ -132,6 +132,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -302,6 +308,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -389,6 +405,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartCholesky.log_survival_function(value, name='log_survival_function')` {#WishartCholesky.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartCholesky.mean(name='mean')` {#WishartCholesky.mean}
@@ -601,6 +646,32 @@ Wishart distribution scale matrix as an OperatorPD.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartCholesky.survival_function(value, name='survival_function')` {#WishartCholesky.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartCholesky.validate_args` {#WishartCholesky.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.BaseEstimator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.BaseEstimator.md
index 7d8417e464d..c9c22e9138d 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.BaseEstimator.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.BaseEstimator.md
@@ -51,7 +51,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.TensorFlowDNNRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.TensorFlowDNNRegressor.md
index f0e71d4e573..20046f02451 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.TensorFlowDNNRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.learn.TensorFlowDNNRegressor.md
@@ -56,7 +56,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.real.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.real.md
index 993fddd96ee..cb1cc327630 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.real.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.real.md
@@ -15,11 +15,12 @@ For example:
 tf.real(input) ==> [-2.25, 3.25]
 ```
 
+If `input` is already real, it is returned unchanged.
+
 ##### Args:
 
 
-*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `complex64`,
-       `complex128`.
+*  <b>`input`</b>: A `Tensor`. Must have numeric type.
 *  <b>`name`</b>: A name for the operation (optional).
 
 ##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.md
new file mode 100644
index 00000000000..3280f5a9448
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.md
@@ -0,0 +1,85 @@
+`MixtureTensor` is a `StochasticTensor` backed by the distribution `Mixture`.
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.__init__(name=None, dist_value_type=None, loss_fn=score_function, **dist_args)` {#MixtureTensor.__init__}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.clone(name=None, **dist_args)` {#MixtureTensor.clone}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.distribution` {#MixtureTensor.distribution}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.dtype` {#MixtureTensor.dtype}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.entropy(name='entropy')` {#MixtureTensor.entropy}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.graph` {#MixtureTensor.graph}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.input_dict` {#MixtureTensor.input_dict}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.loss(final_loss, name='Loss')` {#MixtureTensor.loss}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.mean(name='mean')` {#MixtureTensor.mean}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.name` {#MixtureTensor.name}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.value(name='value')` {#MixtureTensor.value}
+
+
+
+
+- - -
+
+#### `tf.contrib.bayesflow.stochastic_tensor.MixtureTensor.value_type` {#MixtureTensor.value_type}
+
+
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
index 1c7d8899462..cd8ca09005b 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md
@@ -137,6 +137,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -286,6 +292,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -366,6 +382,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Binomial.log_survival_function(value, name='log_survival_function')` {#Binomial.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Binomial.logits` {#Binomial.logits}
@@ -578,6 +623,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Binomial.survival_function(value, name='survival_function')` {#Binomial.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Binomial.validate_args` {#Binomial.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md
index 1a9d3c28b15..2c81cb1782c 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md
@@ -166,6 +166,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -315,6 +321,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -395,6 +411,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.DirichletMultinomial.log_survival_function(value, name='log_survival_function')` {#DirichletMultinomial.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.DirichletMultinomial.mean(name='mean')` {#DirichletMultinomial.mean}
@@ -593,6 +638,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.DirichletMultinomial.survival_function(value, name='survival_function')` {#DirichletMultinomial.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.DirichletMultinomial.validate_args` {#DirichletMultinomial.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md
index 2c4cb44aaf4..751b21a2d11 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md
@@ -88,6 +88,12 @@ Inverse scale parameter.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -244,6 +250,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -324,6 +340,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Exponential.log_survival_function(value, name='log_survival_function')` {#Exponential.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Exponential.mean(name='mean')` {#Exponential.mean}
@@ -515,6 +560,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Exponential.survival_function(value, name='survival_function')` {#Exponential.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Exponential.validate_args` {#Exponential.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md
index 350a67a8390..9013f206691 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md
@@ -115,6 +115,12 @@ Inverse scale parameter.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -264,6 +270,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -344,6 +360,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Gamma.log_survival_function(value, name='log_survival_function')` {#Gamma.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Gamma.mean(name='mean')` {#Gamma.mean}
@@ -535,6 +580,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Gamma.survival_function(value, name='survival_function')` {#Gamma.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Gamma.validate_args` {#Gamma.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md
index 45c2201d613..cb473118ed8 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md
@@ -111,6 +111,12 @@ Scale parameter.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -260,6 +266,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -340,6 +356,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.InverseGamma.log_survival_function(value, name='log_survival_function')` {#InverseGamma.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.InverseGamma.mean(name='mean')` {#InverseGamma.mean}
@@ -531,6 +576,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.InverseGamma.survival_function(value, name='survival_function')` {#InverseGamma.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.InverseGamma.validate_args` {#InverseGamma.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md
index 59a3dfa4ded..e4e6935e3d9 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md
@@ -148,6 +148,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -297,6 +303,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -377,6 +393,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Multinomial.log_survival_function(value, name='log_survival_function')` {#Multinomial.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Multinomial.logits` {#Multinomial.logits}
@@ -589,6 +634,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Multinomial.survival_function(value, name='survival_function')` {#Multinomial.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Multinomial.validate_args` {#Multinomial.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md
index 977fd677395..16130660ae7 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md
@@ -149,6 +149,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -298,6 +304,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -385,6 +401,35 @@ Log probability density/mass function (depending on `is_continuous`).
 Log of determinant of covariance matrix.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.log_survival_function(value, name='log_survival_function')` {#MultivariateNormalDiagPlusVDVT.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.mean(name='mean')` {#MultivariateNormalDiagPlusVDVT.mean}
@@ -597,6 +642,32 @@ Determinant of covariance matrix.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.survival_function(value, name='survival_function')` {#MultivariateNormalDiagPlusVDVT.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.validate_args` {#MultivariateNormalDiagPlusVDVT.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
index 7ebac13feff..11534d34965 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md
@@ -1,7 +1,7 @@
 Estimator class is the basic TensorFlow model trainer/evaluator.
 - - -
 
-#### `tf.contrib.learn.Estimator.__init__(model_fn=None, model_dir=None, config=None, params=None, weight_column_name=None)` {#Estimator.__init__}
+#### `tf.contrib.learn.Estimator.__init__(model_fn=None, model_dir=None, config=None, params=None)` {#Estimator.__init__}
 
 Constructs an Estimator instance.
 
@@ -16,7 +16,7 @@ Constructs an Estimator instance.
       * `(features, targets, mode) -> (predictions, loss, train_op)`
       * `(features, targets, mode, params) -> (predictions, loss, train_op)`
 
-  Where
+    Where
 
       * `features` are single `Tensor` or `dict` of `Tensor`s
              (depending on data passed to `fit`),
@@ -38,9 +38,6 @@ Constructs an Estimator instance.
 *  <b>`config`</b>: Configuration object.
 *  <b>`params`</b>: `dict` of hyper parameters that will be passed into `model_fn`.
           Keys are names of parameters, values are basic python types.
-*  <b>`weight_column_name`</b>: A string defining feature column name representing
-    weights. It is used to down weight or boost examples during training. It
-    will be multiplied by the loss of the example.
 
 ##### Raises:
 
@@ -77,7 +74,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.training.bucket.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.training.bucket.md
new file mode 100644
index 00000000000..8ddb64eac22
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.training.bucket.md
@@ -0,0 +1,84 @@
+### `tf.contrib.training.bucket(tensors, which_bucket, batch_size, num_buckets, num_threads=1, capacity=32, shapes=None, dynamic_pad=False, allow_smaller_final_batch=False, keep_input=None, shared_name=None, name=None)` {#bucket}
+
+Lazy bucketing of input tensors according to `which_bucket`.
+
+The argument `tensors` can be a list or a dictionary of tensors.
+The value returned by the function will be of the same type
+as `tensors`.
+
+The tensors entering this function are put into the bucket given by
+`which_bucket`.  Each bucket has its own queue.  When a bucket contains
+`batch_size` elements, this minibatch is pushed onto a top queue.  The
+tensors returned from this function are a the result of dequeueing the
+next minibatch from this top queue.
+
+This function is implemented using several queues. A `QueueRunner` for the
+queues is added to the current `Graph`'s `QUEUE_RUNNER` collection.
+
+As the returned tensors are the result of of a dequeue operation, evaluating
+them will throw a `tf.errors.OutOfRangeError` when the input queue is
+exhausted.  If these tensors are feeding another input queue, its queue runner
+will catch this exception, however, if they are used in your main thread
+you are responsible for catching this yourself.
+
+*N.B.:* If `dynamic_pad` is `False`, you must ensure that either
+(i) the `shapes` argument is passed, or (ii) all of the tensors in
+`tensors` must have fully-defined shapes. `ValueError` will be
+raised if neither of these conditions holds.
+
+If `dynamic_pad` is `True`, it is sufficient that the *rank* of the
+tensors is known, but individual dimensions may have shape `None`.
+In this case, for each enqueue the dimensions with value `None`
+may have a variable length; upon dequeue, the output tensors will be padded
+on the right to the maximum shape of the tensors in the current minibatch.
+For numbers, this padding takes value 0.  For strings, this padding is
+the empty string.  See `PaddingFIFOQueue` for more info.
+
+If `allow_smaller_final_batch` is `True`, a smaller batch value than
+`batch_size` is returned when the queues are closed and there are not enough
+elements to fill the batch, otherwise the pending elements are discarded.
+In addition, all output tensors' static shapes, as accessed via the
+`get_shape()` method will have a 0th `Dimension` value of `None`, and
+operations that depend on fixed batch_size would fail.
+
+##### Args:
+
+
+*  <b>`tensors`</b>: The list or dictionary of tensors, representing a single element,
+    to bucket.  Nested lists are not supported.
+*  <b>`which_bucket`</b>: An `int32` scalar Tensor taking a value in `[0, num_buckets)`.
+*  <b>`batch_size`</b>: The new batch size pulled from the queue
+    (python int or int32 scalar).
+*  <b>`num_buckets`</b>: A python integer, the number of buckets.
+*  <b>`num_threads`</b>: An integer.  The number of threads enqueuing `tensors`.
+*  <b>`capacity`</b>: An integer. The maximum number of minibatches in the top queue,
+    and also the maximum number of elements within each bucket.
+*  <b>`shapes`</b>: (Optional) The shapes for each example.  Defaults to the
+    inferred shapes for `tensors`.
+*  <b>`dynamic_pad`</b>: Boolean.  Allow variable dimensions in input shapes.
+    The given dimensions are padded upon dequeue so that tensors within a
+    batch have the same shapes.
+*  <b>`allow_smaller_final_batch`</b>: (Optional) Boolean. If `True`, allow the final
+    batches to be smaller if there are insufficient items left in the queues.
+*  <b>`keep_input`</b>: (Optional).  A `bool` scalar Tensor.  If provided, this tensor
+    controls whether the input is added to the queue or not.  If it evaluates
+    `True`, then `tensors` are added to the bucket; otherwise they are
+    dropped.  This tensor essentially acts as a filtering mechanism.
+    The default behavior is to assume `keep_input=True`.
+*  <b>`shared_name`</b>: (Optional). If set, the queues will be shared under the given
+    name across multiple sessions.
+*  <b>`name`</b>: (Optional) A name for the operations.
+
+##### Returns:
+
+  A tuple `(bucket, outputs)` where `bucket` is
+  a `int32` scalar tensor and `outputs` is a list or
+  dictionary of batched outputs corresponding to elements of `tensors`.
+  Every step will receive a new bucket of outputs.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If the `shapes` are not specified, and cannot be
+    inferred from the elements of `tensors`.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md
index 51d40acbc0b..cc7b5ca8175 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md
@@ -51,8 +51,6 @@ Input of `fit` and `evaluate` should have following features,
     Both features' `value` must be a `SparseTensor`.
   - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
     whose `value` is a `Tensor`.
-  - if `feature_columns` is `None`, then `input` must contain only real
-    valued `Tensor`.
 - - -
 
 #### `tf.contrib.learn.DNNClassifier.__init__(hidden_units, feature_columns, model_dir=None, n_classes=2, weight_column_name=None, optimizer=None, activation_fn=relu, dropout=None, gradient_clip_norm=None, enable_centered_bias=True, config=None)` {#DNNClassifier.__init__}
@@ -145,7 +143,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowEstimator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowEstimator.md
index 1ec999d2042..ebafb898111 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowEstimator.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowEstimator.md
@@ -84,7 +84,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowRNNRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowRNNRegressor.md
index 785ef95dc05..2568e2024ba 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowRNNRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.TensorFlowRNNRegressor.md
@@ -99,7 +99,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.TensorFlowDNNClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.TensorFlowDNNClassifier.md
index 8be9a839f24..3f46aefb69a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.TensorFlowDNNClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.learn.TensorFlowDNNClassifier.md
@@ -56,7 +56,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.range.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.range.md
index c33825d3be2..fcd865e5fc1 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.range.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.range.md
@@ -23,13 +23,15 @@ tf.range(limit) ==> [0, 1, 2, 3, 4]
 ##### Args:
 
 
-*  <b>`start`</b>: A 0-D (scalar) of type `int32`. First entry in sequence.
-    Defaults to 0.
+*  <b>`start`</b>: A 0-D (scalar) of type `int32`. Acts as first entry in the range if
+    `limit` is not None; otherwise, acts as range limit and first entry
+    defaults to 0.
 *  <b>`limit`</b>: A 0-D (scalar) of type `int32`. Upper limit of sequence,
-    exclusive.
-*  <b>`delta`</b>: A 0-D `Tensor` (scalar) of type `int32`. Optional. Default is 1.
-    Number that increments `start`.
-*  <b>`name`</b>: A name for the operation (optional).
+    exclusive. If None, defaults to the value of `start` while the first
+    entry of the range defaults to 0.
+*  <b>`delta`</b>: A 0-D `Tensor` (scalar) of type `int32`. Number that increments
+    `start`. Defaults to 1.
+*  <b>`name`</b>: A name for the operation. Defaults to "range".
 
 ##### Returns:
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.conj.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.conj.md
index 6df004b0cdc..e7491301cbc 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.conj.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.conj.md
@@ -1,4 +1,4 @@
-### `tf.conj(input, name=None)` {#conj}
+### `tf.conj(x, name=None)` {#conj}
 
 Returns the complex conjugate of a complex number.
 
@@ -11,18 +11,23 @@ The complex conjugate returned by this operation is of the form \\(a - bj\\).
 
 For example:
 
-```
-# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
-```
+    # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+    tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
+
+If `x` is real, it is returned unchanged.
 
 ##### Args:
 
 
-*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `complex64`, `complex128`.
+*  <b>`x`</b>: `Tensor` to conjugate.  Must have numeric type.
 *  <b>`name`</b>: A name for the operation (optional).
 
 ##### Returns:
 
-  A `Tensor`. Has the same type as `input`.
+  A `Tensor` that is the conjugate of `x` (with the same type).
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: If `x` is not a numeric tensor.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md
index f7a6f0c8a86..0978e391da3 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md
@@ -163,6 +163,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -312,6 +318,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -392,6 +408,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Beta.log_survival_function(value, name='log_survival_function')` {#Beta.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Beta.mean(name='mean')` {#Beta.mean}
@@ -583,6 +628,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Beta.survival_function(value, name='survival_function')` {#Beta.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Beta.validate_args` {#Beta.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md
index fd57c8369d6..2943896a06a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md
@@ -86,6 +86,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -242,6 +248,16 @@ Distribution parameter for the location.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -322,6 +338,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Laplace.log_survival_function(value, name='log_survival_function')` {#Laplace.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Laplace.mean(name='mean')` {#Laplace.mean}
@@ -520,6 +565,32 @@ Distribution parameter for scale.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Laplace.survival_function(value, name='survival_function')` {#Laplace.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Laplace.validate_args` {#Laplace.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md
index 714c457a249..79ac74473db 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md
@@ -114,6 +114,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -263,6 +269,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -350,6 +366,35 @@ Log probability density/mass function (depending on `is_continuous`).
 Log of determinant of covariance matrix.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalFull.log_survival_function(value, name='log_survival_function')` {#MultivariateNormalFull.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalFull.mean(name='mean')` {#MultivariateNormalFull.mean}
@@ -562,6 +607,32 @@ Determinant of covariance matrix.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.MultivariateNormalFull.survival_function(value, name='survival_function')` {#MultivariateNormalFull.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.MultivariateNormalFull.validate_args` {#MultivariateNormalFull.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md
index af4a07e521e..8093ec6be4f 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md
@@ -116,6 +116,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -265,6 +271,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -345,6 +361,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Normal.log_survival_function(value, name='log_survival_function')` {#Normal.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Normal.mean(name='mean')` {#Normal.mean}
@@ -550,6 +595,32 @@ Distribution parameter for standard deviation.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Normal.survival_function(value, name='survival_function')` {#Normal.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Normal.validate_args` {#Normal.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.losses.softmax_cross_entropy.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.losses.softmax_cross_entropy.md
index 6d465f48f48..51d434e8e34 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.losses.softmax_cross_entropy.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.losses.softmax_cross_entropy.md
@@ -28,6 +28,6 @@ If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes:
 ##### Raises:
 
 
-*  <b>`ValueError`</b>: If the shape of `predictions` doesn't match that of `targets` or
-    if the shape of `weight` is invalid or if `weight` is None.
+*  <b>`ValueError`</b>: If the shape of `logits` doesn't match that of `onehot_labels`
+    or if the shape of `weight` is invalid or if `weight` is None.
 
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.Mixture.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.Mixture.md
new file mode 100644
index 00000000000..154905677b3
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.Mixture.md
@@ -0,0 +1,705 @@
+Mixture distribution.
+
+The `Mixture` object implements batched mixture distributions.
+The mixture model is defined by a `Categorical` distribution (the mixture)
+and a python list of `Distribution` objects.
+
+Methods supported include `log_prob`, `prob`, `mean`, `sample`, and
+`entropy_lower_bound`.
+- - -
+
+#### `tf.contrib.distributions.Mixture.__init__(cat, components, validate_args=True, allow_nan_stats=False, name='Mixture')` {#Mixture.__init__}
+
+Initialize a Mixture distribution.
+
+A `Mixture` is defined by a `Categorical` (`cat`, representing the
+mixture probabilities) and a list of `Distribution` objects
+all having matching dtype, batch shape, event shape, and continuity
+properties (the components).
+
+The user does not pass the list of distributions directly, but rather a
+list of `(constructor, batch_tensor_params_dict)` pairs,
+called `components`. The list of distributions is created via:
+
+```python
+distributions = [
+  c(**params_dict) for (c, params_dict) in zip(*components)
+]
+```
+
+This form allows for certain types of batch-shape optimizations within
+this class.
+
+An example of `components`:
+
+```python
+components = [
+  (tf.contrib.distributions.Normal, {"mu": 3.0, "sigma": 1.0}),
+  (functools.partial(tf.contrib.distributions.Normal, validate_args=False),
+   {"mu": 3.0, "sigma": 2.0}),
+  (tf.contrib.distributions.Normal.from_params,
+   {"mu": 1.0, "sigma": -1.0})
+]
+```
+
+The `num_classes` of `cat` must be possible to infer at graph construction
+time and match `len(distributions)`.
+
+##### Args:
+
+
+*  <b>`cat`</b>: A `Categorical` distribution instance, representing the probabilities
+      of `distributions`.
+*  <b>`components`</b>: A list or tuple of `(constructor, batch_tensor_params)`
+    tuples.  The `constructor` must be a callable, and `batch_tensor_params`
+    must be a dict mapping constructor kwargs to batchwise parameters.
+    Each `Distribution` instance created by calling
+    `constructor(**batch_tensor_params)` must have the same type, be defined
+    on the same domain, and have matching `event_shape` and `batch_shape`.
+*  <b>`validate_args`</b>: Boolean, default `True`.  If `True`, raise a runtime error
+    if batch or event ranks are inconsistent between cat and any of the
+    distributions.  This is only checked if the ranks cannot be determined
+    statically at graph construction time.
+*  <b>`allow_nan_stats`</b>: Boolean, default `False`.  If `False`, raise an
+   exception if a statistic (e.g. mean/mode/etc...) is undefined for any
+    batch member.  If `True`, batch members with valid parameters leading to
+    undefined statistics will return NaN for this statistic.
+*  <b>`name`</b>: A name for this distribution (optional).
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: If cat is not a `Categorical`, or `components` is not
+    a list or tuple, or the elements of `components` are not
+    tuples of the form `(callable, dict)`, or the objects resulting
+    from calling `callable(**dict)` are not instances of `Distribution`, or
+    the resulting instances of `Distribution` do not have matching
+    continuity properties, or do not have matching `dtype`.
+*  <b>`ValueError`</b>: If `components` is an empty list or tuple, or the
+    distributions created from `components` do have a statically known event
+    rank.  If `cat.num_classes` cannot be inferred at graph creation time,
+    or the constant value of `cat.num_classes` is not equal to
+    `len(distributions)`, or all `distributions` and `cat` do not have
+    matching static batch shapes, or all components' distributions do not
+    have matching static event shapes.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.allow_nan_stats` {#Mixture.allow_nan_stats}
+
+Python boolean describing behavior when a stat is undefined.
+
+Stats return +/- infinity when it makes sense.  E.g., the variance
+of a Cauchy distribution is infinity.  However, sometimes the
+statistic is undefined, e.g., if a distribution's pdf does not achieve a
+maximum within the support of the distribution, the mode is undefined.
+If the mean is undefined, then by definition the variance is undefined.
+E.g. the mean for Student's T for df = 1 is undefined (no clear way to say
+it is either + or - infinity), so the variance = E[(X - mean)^2] is also
+undefined.
+
+##### Returns:
+
+
+*  <b>`allow_nan_stats`</b>: Python boolean.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.batch_shape(name='batch_shape')` {#Mixture.batch_shape}
+
+Shape of a single sample from a single event index as a 1-D `Tensor`.
+
+The product of the dimensions of the `batch_shape` is the number of
+independent distributions of this kind the instance represents.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+
+*  <b>`batch_shape`</b>: `Tensor`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.cat` {#Mixture.cat}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.cdf(value, name='cdf')` {#Mixture.cdf}
+
+Cumulative distribution function.
+
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`cdf`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.distributions` {#Mixture.distributions}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.dtype` {#Mixture.dtype}
+
+The `DType` of `Tensor`s handled by this `Distribution`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.entropy(name='entropy')` {#Mixture.entropy}
+
+Shanon entropy in nats.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.entropy_lower_bound(name='entropy_lower_bound')` {#Mixture.entropy_lower_bound}
+
+A lower bound on the entropy of this mixture model.
+
+The bound below is not always very tight, and its usefulness depends
+on the mixture probabilities and the distributions in use.
+
+A lower bound is useful for ELBO when the `Mixture` is the variational
+distribution:
+
+\\(
+\log p(x) >= ELBO = \int q(z) \log p(x, z) dz + H[q]
+\\)
+
+where \\( p \\) is the prior disribution, \\( q \\) is the variational,
+and \\( H[q] \\) is the entropy of \\( q \\).  If there is a lower bound
+\\( G[q] \\) such that \\( H[q] \geq G[q] \\) then it can be used in
+place of \\( H[q] \\).
+
+For a mixture of distributions \\( q(Z) = \sum_i c_i q_i(Z) \\) with
+\\( \sum_i c_i = 1 \\), by the concavity of \\( f(x) = -x \log x \\), a
+simple lower bound is:
+
+\\(
+\begin{align}
+H[q] & = - \int q(z) \log q(z) dz \\\
+   & = - \int (\sum_i c_i q_i(z)) \log(\sum_i c_i q_i(z)) dz \\\
+   & \geq - \sum_i c_i \int q_i(z) \log q_i(z) dz \\\
+   & = \sum_i c_i H[q_i]
+\end{align}
+\\)
+
+This is the term we calculate below for \\( G[q] \\).
+
+##### Args:
+
+
+*  <b>`name`</b>: A name for this operation (optional).
+
+##### Returns:
+
+  A lower bound on the Mixture's entropy.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.event_shape(name='event_shape')` {#Mixture.event_shape}
+
+Shape of a single sample from a single batch as a 1-D int32 `Tensor`.
+
+##### Args:
+
+
+*  <b>`name`</b>: name to give to the op
+
+##### Returns:
+
+
+*  <b>`event_shape`</b>: `Tensor`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.from_params(cls, make_safe=True, **kwargs)` {#Mixture.from_params}
+
+Given (unconstrained) parameters, return an instantiated distribution.
+
+Subclasses should implement a static method `_safe_transforms` that returns
+a dict of parameter transforms, which will be used if `make_safe = True`.
+
+Example usage:
+
+```
+# Let's say we want a sample of size (batch_size, 10)
+shapes = MultiVariateNormalDiag.param_shapes([batch_size, 10])
+
+# shapes has a Tensor shape for mu and sigma
+# shapes == {
+#   "mu": tf.constant([batch_size, 10]),
+#   "sigma": tf.constant([batch_size, 10]),
+# }
+
+# Here we parameterize mu and sigma with the output of a linear
+# layer. Note that sigma is unconstrained.
+params = {}
+for name, shape in shapes.items():
+  params[name] = linear(x, shape[1])
+
+# Note that you can forward other kwargs to the `Distribution`, like
+# `allow_nan_stats` or `name`.
+mvn = MultiVariateNormalDiag.from_params(**params, allow_nan_stats=True)
+```
+
+Distribution parameters may have constraints (e.g. `sigma` must be positive
+for a `Normal` distribution) and the `from_params` method will apply default
+parameter transforms. If a user wants to use their own transform, they can
+apply it externally and set `make_safe=False`.
+
+##### Args:
+
+
+*  <b>`make_safe`</b>: Whether the `params` should be constrained. If True,
+    `from_params` will apply default parameter transforms. If False, no
+    parameter transforms will be applied.
+*  <b>`**kwargs`</b>: dict of parameters for the distribution.
+
+##### Returns:
+
+  A distribution parameterized by possibly transformed parameters in
+  `kwargs`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `make_safe` is `True` but `_safe_transforms` is not
+    implemented directly for `cls`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.get_batch_shape()` {#Mixture.get_batch_shape}
+
+Shape of a single sample from a single event index as a `TensorShape`.
+
+Same meaning as `batch_shape`. May be only partially defined.
+
+##### Returns:
+
+
+*  <b>`batch_shape`</b>: `TensorShape`, possibly unknown.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.get_event_shape()` {#Mixture.get_event_shape}
+
+Shape of a single sample from a single batch as a `TensorShape`.
+
+Same meaning as `event_shape`. May be only partially defined.
+
+##### Returns:
+
+
+*  <b>`event_shape`</b>: `TensorShape`, possibly unknown.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.is_continuous` {#Mixture.is_continuous}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.is_reparameterized` {#Mixture.is_reparameterized}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.log_cdf(value, name='log_cdf')` {#Mixture.log_cdf}
+
+Log cumulative distribution function.
+
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`logcdf`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.log_pdf(value, name='log_pdf')` {#Mixture.log_pdf}
+
+Log probability density function.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`log_prob`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+##### Raises:
+
+
+*  <b>`AttributeError`</b>: if not `is_continuous`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.log_pmf(value, name='log_pmf')` {#Mixture.log_pmf}
+
+Log probability mass function.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`log_pmf`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+##### Raises:
+
+
+*  <b>`AttributeError`</b>: if `is_continuous`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.log_prob(value, name='log_prob')` {#Mixture.log_prob}
+
+Log probability density/mass function (depending on `is_continuous`).
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`log_prob`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.log_survival_function(value, name='log_survival_function')` {#Mixture.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.mean(name='mean')` {#Mixture.mean}
+
+Mean.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.mode(name='mode')` {#Mixture.mode}
+
+Mode.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.name` {#Mixture.name}
+
+Name prepended to all ops created by this `Distribution`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.num_components` {#Mixture.num_components}
+
+
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.param_shapes(cls, sample_shape, name='DistributionParamShapes')` {#Mixture.param_shapes}
+
+Shapes of parameters given the desired shape of a call to `sample()`.
+
+Subclasses should override static method `_param_shapes`.
+
+##### Args:
+
+
+*  <b>`sample_shape`</b>: `Tensor` or python list/tuple. Desired shape of a call to
+    `sample()`.
+*  <b>`name`</b>: name to prepend ops with.
+
+##### Returns:
+
+  `dict` of parameter name to `Tensor` shapes.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.param_static_shapes(cls, sample_shape)` {#Mixture.param_static_shapes}
+
+param_shapes with static (i.e. TensorShape) shapes.
+
+##### Args:
+
+
+*  <b>`sample_shape`</b>: `TensorShape` or python list/tuple. Desired shape of a call
+    to `sample()`.
+
+##### Returns:
+
+  `dict` of parameter name to `TensorShape`.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: if `sample_shape` is a `TensorShape` and is not fully defined.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.parameters` {#Mixture.parameters}
+
+Dictionary of parameters used by this `Distribution`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.pdf(value, name='pdf')` {#Mixture.pdf}
+
+Probability density function.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`prob`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+##### Raises:
+
+
+*  <b>`AttributeError`</b>: if not `is_continuous`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.pmf(value, name='pmf')` {#Mixture.pmf}
+
+Probability mass function.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`pmf`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+##### Raises:
+
+
+*  <b>`AttributeError`</b>: if `is_continuous`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.prob(value, name='prob')` {#Mixture.prob}
+
+Probability density/mass function (depending on `is_continuous`).
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+
+*  <b>`prob`</b>: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+    values of type `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.sample(sample_shape=(), seed=None, name='sample')` {#Mixture.sample}
+
+Generate samples of the specified shape.
+
+Note that a call to `sample()` without arguments will generate a single
+sample.
+
+##### Args:
+
+
+*  <b>`sample_shape`</b>: 0D or 1D `int32` `Tensor`. Shape of the generated samples.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` with prepended dimensions `sample_shape`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.sample_n(n, seed=None, name='sample_n')` {#Mixture.sample_n}
+
+Generate `n` samples.
+
+##### Args:
+
+
+*  <b>`n`</b>: `Scalar` `Tensor` of type `int32` or `int64`, the number of
+    observations to sample.
+*  <b>`seed`</b>: Python integer seed for RNG
+*  <b>`name`</b>: name to give to the op.
+
+##### Returns:
+
+
+*  <b>`samples`</b>: a `Tensor` with a prepended dimension (n,).
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `n` is not an integer type.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.std(name='std')` {#Mixture.std}
+
+Standard deviation.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.survival_function(value, name='survival_function')` {#Mixture.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.validate_args` {#Mixture.validate_args}
+
+Python boolean indicated possibly expensive checks are enabled.
+
+
+- - -
+
+#### `tf.contrib.distributions.Mixture.variance(name='variance')` {#Mixture.variance}
+
+Variance.
+
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.losses.sparse_softmax_cross_entropy.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.losses.sparse_softmax_cross_entropy.md
new file mode 100644
index 00000000000..f57e5569ff4
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.losses.sparse_softmax_cross_entropy.md
@@ -0,0 +1,29 @@
+### `tf.contrib.losses.sparse_softmax_cross_entropy(logits, labels, weight=1.0, scope=None)` {#sparse_softmax_cross_entropy}
+
+Cross-entropy loss using tf.nn.sparse_softmax_cross_entropy_with_logits.
+
+`weight` acts as a coefficient for the loss. If a scalar is provided,
+then the loss is simply scaled by the given value. If `weight` is a
+tensor of size [`batch_size`], then the loss weights apply to each
+corresponding sample.
+
+##### Args:
+
+
+*  <b>`logits`</b>: [batch_size, num_classes] logits outputs of the network .
+*  <b>`labels`</b>: [batch_size, 1] or [batch_size] target labels of dtype `int32` or
+    `int64` in the range `[0, num_classes)`.
+*  <b>`weight`</b>: Coefficients for the loss. The tensor must be a scalar or a tensor
+    of shape [batch_size] or [batch_size, 1].
+*  <b>`scope`</b>: the scope for the operations performed in computing the loss.
+
+##### Returns:
+
+  A scalar `Tensor` representing the loss value.
+
+##### Raises:
+
+
+*  <b>`ValueError`</b>: If the shapes of logits, labels, and weight are incompatible, or
+    if `weight` is None.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.run_metric.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.run_metric.md
deleted file mode 100644
index 8009454fec3..00000000000
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.run_metric.md
+++ /dev/null
@@ -1,21 +0,0 @@
-### `tf.contrib.metrics.run_metric(metric, predictions, targets, weights=None)` {#run_metric}
-
-Runs a single metric.
-
-This function runs metric on given predictions and targets. weights will be
-used if metric contains 'weights' in its argument.
-
-##### Args:
-
-
-*  <b>`metric`</b>: A function that evaluates targets given predictions.
-*  <b>`predictions`</b>: A `Tensor` of arbitrary shape.
-*  <b>`targets`</b>: A `Tensor` of the same shape as `predictions`.
-*  <b>`weights`</b>: A set of weights that can be used in metric function to compute
-    weighted result.
-
-##### Returns:
-
-
-*  <b>`result`</b>: result returned by metric function.
-
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.training.bucket_by_sequence_length.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.training.bucket_by_sequence_length.md
new file mode 100644
index 00000000000..85b5f08c3ca
--- /dev/null
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.training.bucket_by_sequence_length.md
@@ -0,0 +1,53 @@
+### `tf.contrib.training.bucket_by_sequence_length(input_length, tensors, batch_size, bucket_boundaries, num_threads=1, capacity=32, shapes=None, dynamic_pad=False, allow_smaller_final_batch=False, keep_input=None, shared_name=None, name=None)` {#bucket_by_sequence_length}
+
+Lazy bucketing of inputs according to their length.
+
+This method calls `tf.contrib.training.bucket` under the hood, after first
+subdividing the bucket boundaries into separate buckets and identifying which
+bucket the given `input_length` belongs to.  See the documentation for
+`which_bucket` for details of the other arguments.
+
+##### Args:
+
+
+*  <b>`input_length`</b>: `int32` scalar `Tensor`, the sequence length of tensors.
+*  <b>`tensors`</b>: The list or dictionary of tensors, representing a single element,
+    to bucket.  Nested lists are not supported.
+*  <b>`batch_size`</b>: The new batch size pulled from the queue
+    (python int or int32 scalar).
+*  <b>`bucket_boundaries`</b>: int list, increasing non-negative numbers.
+    The edges of the buckets to use when bucketing tensors.  Two extra buckets
+    are created, one for `input_length < bucket_boundaries[0]` and
+    one for `input_length >= bucket_boundaries[-1]`.
+*  <b>`num_threads`</b>: An integer.  The number of threads enqueuing `tensors`.
+*  <b>`capacity`</b>: An integer. The maximum number of minibatches in the top queue,
+    and also the maximum number of elements within each bucket.
+*  <b>`shapes`</b>: (Optional) The shapes for each example.  Defaults to the
+    inferred shapes for `tensors`.
+*  <b>`dynamic_pad`</b>: Boolean.  Allow variable dimensions in input shapes.
+    The given dimensions are padded upon dequeue so that tensors within a
+    batch have the same shapes.
+*  <b>`allow_smaller_final_batch`</b>: (Optional) Boolean. If `True`, allow the final
+    batches to be smaller if there are insufficient items left in the queues.
+*  <b>`keep_input`</b>: (Optional).  A `bool` scalar Tensor.  If provided, this tensor
+    controls whether the input is added to the queue or not.  If it evaluates
+    `True`, then `tensors` are added to the bucket; otherwise they are
+    dropped.  This tensor essentially acts as a filtering mechanism.
+    The default behavior is to assume `keep_input=True`.
+*  <b>`shared_name`</b>: (Optional). If set, the queues will be shared under the given
+    name across multiple sessions.
+*  <b>`name`</b>: (Optional) A name for the operations.
+
+##### Returns:
+
+  A tuple `(sequence_length, outputs)` where `sequence_length` is
+  a 1-D `Tensor` of size `batch_size` and `outputs` is a list or dictionary
+  of batched, bucketed, outputs corresponding to elements of `tensors`.
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: if `bucket_boundaries` is not a list of python integers.
+*  <b>`ValueError`</b>: if `bucket_boundaries` is empty or contains non-increasing
+    values.
+
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.batch.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.batch.md
index e1cd8aa7c07..9112cf531d4 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.batch.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.batch.md
@@ -15,7 +15,7 @@ with shape `[batch_size, x, y, z]`.
 
 If `enqueue_many` is `True`, `tensors` is assumed to represent a batch of
 examples, where the first dimension is indexed by example, and all members of
-`tensor_list` should have the same size in the first dimension.  If an input
+`tensors` should have the same size in the first dimension.  If an input
 tensor has shape `[*, x, y, z]`, the output will have shape `[batch_size, x,
 y, z]`.  The `capacity` argument controls the how long the prefetching is
 allowed to grow the queues.
@@ -51,11 +51,11 @@ operations that depend on fixed batch_size would fail.
 
 *  <b>`tensors`</b>: The list or dictionary of tensors to enqueue.
 *  <b>`batch_size`</b>: The new batch size pulled from the queue.
-*  <b>`num_threads`</b>: The number of threads enqueuing `tensor_list`.
+*  <b>`num_threads`</b>: The number of threads enqueuing `tensors`.
 *  <b>`capacity`</b>: An integer. The maximum number of elements in the queue.
-*  <b>`enqueue_many`</b>: Whether each tensor in `tensor_list` is a single example.
+*  <b>`enqueue_many`</b>: Whether each tensor in `tensors` is a single example.
 *  <b>`shapes`</b>: (Optional) The shapes for each example.  Defaults to the
-    inferred shapes for `tensor_list`.
+    inferred shapes for `tensors`.
 *  <b>`dynamic_pad`</b>: Boolean.  Allow variable dimensions in input shapes.
     The given dimensions are padded upon dequeue so that tensors within a
     batch have the same shapes.
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md
index 080df4c6575..2faa75c348a 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md
@@ -77,6 +77,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -233,6 +239,16 @@ Rate parameter.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -313,6 +329,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.Poisson.log_survival_function(value, name='log_survival_function')` {#Poisson.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Poisson.mean(name='mean')` {#Poisson.mean}
@@ -504,6 +549,32 @@ Generate `n` samples.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.Poisson.survival_function(value, name='survival_function')` {#Poisson.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.Poisson.validate_args` {#Poisson.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md
index 96b878fcfe6..0a37a3607bd 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md
@@ -128,6 +128,12 @@ independent distributions of this kind the instance represents.
 
 Cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+cdf(x) := P[X <= x]
+```
+
 ##### Args:
 
 
@@ -298,6 +304,16 @@ Same meaning as `event_shape`. May be only partially defined.
 
 Log cumulative distribution function.
 
+Given random variable `X`, the cumulative distribution function `cdf` is:
+
+```
+log_cdf(x) := Log[ P[X <= x] ]
+```
+
+Often, a numerical approximation can be used for `log_cdf(x)` that yields
+a more accurate answer than simply taking the logarithm of the `cdf` when
+`x << -1`.
+
 ##### Args:
 
 
@@ -385,6 +401,35 @@ Log probability density/mass function (depending on `is_continuous`).
     values of type `self.dtype`.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartFull.log_survival_function(value, name='log_survival_function')` {#WishartFull.log_survival_function}
+
+Log survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+log_survival_function(x) = Log[ P[X > x] ]
+                         = Log[ 1 - P[X <= x] ]
+                         = Log[ 1 - cdf(x) ]
+```
+
+Typically, different numerical approximations can be used for the log
+survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartFull.mean(name='mean')` {#WishartFull.mean}
@@ -597,6 +642,32 @@ Wishart distribution scale matrix as an OperatorPD.
 Standard deviation.
 
 
+- - -
+
+#### `tf.contrib.distributions.WishartFull.survival_function(value, name='survival_function')` {#WishartFull.survival_function}
+
+Survival function.
+
+Given random variable `X`, the survival function is defined:
+
+```
+survival_function(x) = P[X > x]
+                     = 1 - P[X <= x]
+                     = 1 - cdf(x).
+```
+
+##### Args:
+
+
+*  <b>`value`</b>: `float` or `double` `Tensor`.
+*  <b>`name`</b>: The name to give this op.
+
+##### Returns:
+
+  Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
+    `self.dtype`.
+
+
 - - -
 
 #### `tf.contrib.distributions.WishartFull.validate_args` {#WishartFull.validate_args}
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md
index f0ce2b4f54c..c31d3fda13b 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md
@@ -51,8 +51,6 @@ Input of `fit` and `evaluate` should have following features,
     Both features' `value` must be a `SparseTensor`.
   - if `column` is a `RealValuedColumn`, a feature with `key=column.name`
     whose `value` is a `Tensor`.
-  - if `feature_columns` is `None`, then `input` must contain only real
-    valued `Tensor`.
 - - -
 
 #### `tf.contrib.learn.DNNRegressor.__init__(hidden_units, feature_columns, model_dir=None, weight_column_name=None, optimizer=None, activation_fn=relu, dropout=None, gradient_clip_norm=None, enable_centered_bias=True, config=None)` {#DNNRegressor.__init__}
@@ -143,7 +141,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.TensorFlowRNNClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.TensorFlowRNNClassifier.md
index 4fb50c6b59c..cc2aabb8808 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.TensorFlowRNNClassifier.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.TensorFlowRNNClassifier.md
@@ -98,7 +98,7 @@ Exports inference graph into given dir. (deprecated arguments)
 
 SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-09-23.
 Instructions for updating:
-The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False &  be removed altogether.
+The signature of the input_fn accepted by export is changing to be consistent with what's used by tf.Learn Estimator's train/evaluate. input_fn and input_feature_key will become required args, and use_deprecated_input_fn will default to False and be removed altogether.
 
     Args:
       export_dir: A string containing a directory to write the exported graph
diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.nn.raw_rnn.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.nn.raw_rnn.md
index d0055a8a438..bd2c027ca43 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.nn.raw_rnn.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.nn.raw_rnn.md
@@ -1,4 +1,4 @@
-### `tf.nn.raw_rnn(cell, loop_fn, initial_state, parallel_iterations=None, swap_memory=False, scope=None)` {#raw_rnn}
+### `tf.nn.raw_rnn(cell, loop_fn, parallel_iterations=None, swap_memory=False, scope=None)` {#raw_rnn}
 
 Creates an `RNN` specified by RNNCell `cell` and loop function `loop_fn`.
 
@@ -16,16 +16,18 @@ Instead of working with `Tensor` objects, most operations work with
 `TensorArray` objects directly.
 
 The operation of `raw_rnn`, in pseudo-code, is basically the following:
+
 ```
-emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype)
 time = tf.constant(0, dtype=tf.int32)
-(finished, next_input, _, loop_state) = loop_fn(
-    time=time, cell_output=None, loop_state=None)
+(finished, next_input, initial_state, _, loop_state) = loop_fn(
+    time=time, cell_output=None, cell_state=None, loop_state=None)
+emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype)
 state = initial_state
 while not all(finished):
-  (output, next_state) = cell(next_input, state)
-  (next_finished, next_input, emit, loop_state) = loop_fn(
-      time=time + 1, cell_output=output, loop_state=loop_state)
+  (output, cell_state) = cell(next_input, state)
+  (next_finished, next_input, next_state, emit, loop_state) = loop_fn(
+      time=time + 1, cell_output=output, cell_state=cell_state,
+      loop_state=loop_state)
   # Emit zeros and copy forward state for minibatch entries that are finished.
   state = tf.select(finished, state, next_state)
   emit = tf.select(finished, tf.zeros_like(emit), emit)
@@ -49,8 +51,14 @@ sequence_length = tf.placeholder(shape=(batch_size,), dtype=tf.int32)
 inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time)
 inputs_ta = inputs_ta.unpack(inputs)
 
-def loop_fn(time, cell_output, loop_state):
+cell = tf.nn.rnn_cell.LSTMCell(num_units)
+
+def loop_fn(time, cell_output, cell_state, loop_state):
   emit_output = cell_output  # == None for time == 0
+  if cell_output is None:  # time == 0
+    next_cell_state = cell.zero_state(batch_size, tf.float32)
+  else:
+    next_cell_state = cell_state
   elements_finished = (time >= sequence_length)
   finished = tf.reduce_all(elements_finished)
   next_input = tf.cond(
@@ -58,11 +66,10 @@ def loop_fn(time, cell_output, loop_state):
       lambda: tf.zeros([batch_size, input_depth], dtype=tf.float32),
       lambda: inputs_ta.read(time))
   next_loop_state = None
-  return (elements_finished, next_input, emit_output, next_loop_state)
+  return (elements_finished, next_input, next_cell_state,
+          emit_output, next_loop_state)
 
-cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-initial_state = cell.zero_state(batch_size, tf.float32)
-outputs_ta, final_state, _ = raw_rnn(cell, loop_fn, initial_state)
+outputs_ta, final_state, _ = raw_rnn(cell, loop_fn)
 outputs = outputs_ta.pack()
 ```
 
@@ -70,25 +77,46 @@ outputs = outputs_ta.pack()
 
 
 *  <b>`cell`</b>: An instance of RNNCell.
-*  <b>`loop_fn`</b>: A callable that takes inputs `(time, cell_output, loop_state)` and
-    returns the tuple `(finished, next_input, emit_output, next_loop_state)`.
+*  <b>`loop_fn`</b>: A callable that takes inputs
+    `(time, cell_output, cell_state, loop_state)`
+    and returns the tuple
+    `(finished, next_input, next_cell_state, emit_output, next_loop_state)`.
     Here `time` is an int32 scalar `Tensor`, `cell_output` is a
     `Tensor` or (possibly nested) tuple of tensors as determined by
-    `cell.output_size`.  In addition, `finished` is a boolean `Tensor` of
-    shape `[batch_size]`, `next_input` is the next input to feed to `cell`,
-    and `emit_output` is the output to store for this iteration.  Note that
-    `emit_output` should be a `Tensor` or (possibly nested) tuple of tensors
-    with shapes and structure matching `cell.output_size` and `cell_output`
-    above.  The parameter `loop_state` and output `next_loop_state` may be
-    either a single or (possibly nested) tuple of tensors.  This paramter
+    `cell.output_size`, and `cell_state` is a `Tensor`
+    or (possibly nested) tuple of tensors, as determined by the `loop_fn`
+    on its first call (and should match `cell.state_size`).
+    The outputs are: `finished`, a boolean `Tensor` of
+    shape `[batch_size]`, `next_input`: the next input to feed to `cell`,
+    `next_cell_state`: the next state to feed to `cell`,
+    and `emit_output`: the output to store for this iteration.
+
+    Note that `emit_output` should be a `Tensor` or (possibly nested)
+    tuple of tensors with shapes and structure matching `cell.output_size`
+    and `cell_output` above.  The parameter `cell_state` and output
+    `next_cell_state` may be either a single or (possibly nested) tuple
+    of tensors.  The parameter `loop_state` and
+    output `next_loop_state` may be either a single or (possibly nested) tuple
+    of `Tensor` and `TensorArray` objects.  This last parameter
     may be ignored by `loop_fn` and the return value may be `None`.  If it
     is not `None`, then the `loop_state` will be propagated through the RNN
     loop, for use purely by `loop_fn` to keep track of its own state.
     The `next_loop_state` parameter returned may be `None`.
 
     The first call to `loop_fn` will be `time = 0`, `cell_output = None`,
-    and `loop_state = None`.  Its `emit_output` value in this case may be
-    either `None` or a (possibly nested) tuple structure of Tensors, e.g.,
+    `cell_state = None`, and `loop_state = None`.  For this call:
+    The `next_cell_state` value should be the value with which to initialize
+    the cell's state.  It may be a final state from a previous RNN or it
+    may be the output of `cell.zero_state()`.  It should be a
+    (possibly nested) tuple structure of tensors.
+    If `cell.state_size` is an integer, this must be
+    a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
+    If `cell.state_size` is a `TensorShape`, this must be a `Tensor` of
+    appropriate type and shape `[batch_size] + cell.state_size`.
+    If `cell.state_size` is a (possibly nested) tuple of ints or
+    `TensorShape`, this will be a tuple having the corresponding shapes.
+    The `emit_output` value may be  either `None` or a (possibly nested)
+    tuple structure of tensors, e.g.,
     `(tf.zeros(shape_0, dtype=dtype_0), tf.zeros(shape_1, dtype=dtype_1))`.
     If this first `emit_output` return value is `None`,
     then the `emit_ta` result of `raw_rnn` will have the same structure and
@@ -99,13 +127,6 @@ outputs = outputs_ta.pack()
     consistent across all time steps.
 
 
-*  <b>`initial_state`</b>: An initial state for the RNN.
-    If `cell.state_size` is an integer, this must be
-    a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
-    If `cell.state_size` is a `TensorShape`, this must be a `Tensor` of
-    appropriate type and shape `[batch_size] + cell.state_size`.
-    If `cell.state_size` is a (possibly nested) tuple of ints or
-    `TensorShape`, this will be a tuple having the corresponding shapes.
 *  <b>`parallel_iterations`</b>: (Default: 32).  The number of iterations to run in
     parallel.  Those operations which do not have any temporal dependency
     and can be run in parallel, will be.  This parameter trades off
@@ -121,26 +142,25 @@ outputs = outputs_ta.pack()
 
   A tuple `(emit_ta, final_state, final_loop_state)` where:
 
-    `emit_ta`: The RNN output `TensorArray`.
-       If `loop_fn` returns a (possibly nested) set of Tensors for
-       `emit_output` during initialization, (inputs `time = 0`,
-       `cell_output = None`, and `loop_state = None`), then `emit_ta` will
-       have the same structure, dtypes, and shapes as `emit_output` instead.
-       If `loop_fn` returns `emit_output = None` during this call,
-       the structure of `cell.output_size` is used:
+  `emit_ta`: The RNN output `TensorArray`.
+     If `loop_fn` returns a (possibly nested) set of Tensors for
+     `emit_output` during initialization, (inputs `time = 0`,
+     `cell_output = None`, and `loop_state = None`), then `emit_ta` will
+     have the same structure, dtypes, and shapes as `emit_output` instead.
+     If `loop_fn` returns `emit_output = None` during this call,
+     the structure of `cell.output_size` is used:
+     If `cell.output_size` is a (possibly nested) tuple of integers
+     or `TensorShape` objects, then `emit_ta` will be a tuple having the
+     same structure as `cell.output_size`, containing TensorArrays whose
+     elements' shapes correspond to the shape data in `cell.output_size`.
 
-       If `cell.output_size` is a (possibly nested) tuple of integers
-       or `TensorShape` objects, then `emit_ta` will be a tuple having the
-       same structure as `cell.output_size`, containing TensorArrays whose
-       elements' shapes correspond to the shape data in `cell.output_size`.
+  `final_state`: The final cell state.  If `cell.state_size` is an int, this
+    will be shaped `[batch_size, cell.state_size]`.  If it is a
+    `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
+    If it is a (possibly nested) tuple of ints or `TensorShape`, this will
+    be a tuple having the corresponding shapes.
 
-    `final_state`: The final cell state.  If `cell.state_size` is an int, this
-      will be shaped `[batch_size, cell.state_size]`.  If it is a
-      `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
-      If it is a (possibly nested) tuple of ints or `TensorShape`, this will
-      be a tuple having the corresponding shapes.
-
-    `final_loop_state`: The final loop state as returned by `loop_fn`.
+  `final_loop_state`: The final loop state as returned by `loop_fn`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md
index 3bd88eedacb..3de9afccb5f 100644
--- a/tensorflow/g3doc/api_docs/python/index.md
+++ b/tensorflow/g3doc/api_docs/python/index.md
@@ -632,6 +632,7 @@
   * [`InverseGammaTensor`](../../api_docs/python/contrib.bayesflow.stochastic_tensor.md#InverseGammaTensor)
   * [`LaplaceTensor`](../../api_docs/python/contrib.bayesflow.stochastic_tensor.md#LaplaceTensor)
   * [`MeanValue`](../../api_docs/python/contrib.bayesflow.stochastic_tensor.md#MeanValue)
+  * [`MixtureTensor`](../../api_docs/python/contrib.bayesflow.stochastic_tensor.md#MixtureTensor)
   * [`MultinomialTensor`](../../api_docs/python/contrib.bayesflow.stochastic_tensor.md#MultinomialTensor)
   * [`MultivariateNormalCholeskyTensor`](../../api_docs/python/contrib.bayesflow.stochastic_tensor.md#MultivariateNormalCholeskyTensor)
   * [`MultivariateNormalDiagPlusVDVTTensor`](../../api_docs/python/contrib.bayesflow.stochastic_tensor.md#MultivariateNormalDiagPlusVDVTTensor)
@@ -671,6 +672,7 @@
   * [`InverseGamma`](../../api_docs/python/contrib.distributions.md#InverseGamma)
   * [`kl`](../../api_docs/python/contrib.distributions.md#kl)
   * [`Laplace`](../../api_docs/python/contrib.distributions.md#Laplace)
+  * [`Mixture`](../../api_docs/python/contrib.distributions.md#Mixture)
   * [`Multinomial`](../../api_docs/python/contrib.distributions.md#Multinomial)
   * [`MultivariateNormalCholesky`](../../api_docs/python/contrib.distributions.md#MultivariateNormalCholesky)
   * [`MultivariateNormalDiag`](../../api_docs/python/contrib.distributions.md#MultivariateNormalDiag)
@@ -890,6 +892,7 @@
   * [`mean_squared_error`](../../api_docs/python/contrib.losses.md#mean_squared_error)
   * [`sigmoid_cross_entropy`](../../api_docs/python/contrib.losses.md#sigmoid_cross_entropy)
   * [`softmax_cross_entropy`](../../api_docs/python/contrib.losses.md#softmax_cross_entropy)
+  * [`sparse_softmax_cross_entropy`](../../api_docs/python/contrib.losses.md#sparse_softmax_cross_entropy)
   * [`sum_of_pairwise_squares`](../../api_docs/python/contrib.losses.md#sum_of_pairwise_squares)
   * [`sum_of_squares`](../../api_docs/python/contrib.losses.md#sum_of_squares)
 
@@ -907,7 +910,6 @@
   * [`aggregate_metrics`](../../api_docs/python/contrib.metrics.md#aggregate_metrics)
   * [`auc_using_histogram`](../../api_docs/python/contrib.metrics.md#auc_using_histogram)
   * [`confusion_matrix`](../../api_docs/python/contrib.metrics.md#confusion_matrix)
-  * [`run_metric`](../../api_docs/python/contrib.metrics.md#run_metric)
   * [`set_difference`](../../api_docs/python/contrib.metrics.md#set_difference)
   * [`set_intersection`](../../api_docs/python/contrib.metrics.md#set_intersection)
   * [`set_size`](../../api_docs/python/contrib.metrics.md#set_size)
@@ -932,6 +934,8 @@
 
 * **[Training (contrib)](../../api_docs/python/contrib.training.md)**:
   * [`batch_sequences_with_states`](../../api_docs/python/contrib.training.md#batch_sequences_with_states)
+  * [`bucket`](../../api_docs/python/contrib.training.md#bucket)
+  * [`bucket_by_sequence_length`](../../api_docs/python/contrib.training.md#bucket_by_sequence_length)
   * [`NextQueuedSequenceBatch`](../../api_docs/python/contrib.training.md#NextQueuedSequenceBatch)
   * [`SequenceQueueingStateSaver`](../../api_docs/python/contrib.training.md#SequenceQueueingStateSaver)
   * [`stratified_sample`](../../api_docs/python/contrib.training.md#stratified_sample)
diff --git a/tensorflow/g3doc/api_docs/python/io_ops.md b/tensorflow/g3doc/api_docs/python/io_ops.md
index af6479723a4..b5c799a0367 100644
--- a/tensorflow/g3doc/api_docs/python/io_ops.md
+++ b/tensorflow/g3doc/api_docs/python/io_ops.md
@@ -2434,7 +2434,7 @@ with shape `[batch_size, x, y, z]`.
 
 If `enqueue_many` is `True`, `tensors` is assumed to represent a batch of
 examples, where the first dimension is indexed by example, and all members of
-`tensor_list` should have the same size in the first dimension.  If an input
+`tensors` should have the same size in the first dimension.  If an input
 tensor has shape `[*, x, y, z]`, the output will have shape `[batch_size, x,
 y, z]`.  The `capacity` argument controls the how long the prefetching is
 allowed to grow the queues.
@@ -2470,11 +2470,11 @@ operations that depend on fixed batch_size would fail.
 
 *  <b>`tensors`</b>: The list or dictionary of tensors to enqueue.
 *  <b>`batch_size`</b>: The new batch size pulled from the queue.
-*  <b>`num_threads`</b>: The number of threads enqueuing `tensor_list`.
+*  <b>`num_threads`</b>: The number of threads enqueuing `tensors`.
 *  <b>`capacity`</b>: An integer. The maximum number of elements in the queue.
-*  <b>`enqueue_many`</b>: Whether each tensor in `tensor_list` is a single example.
+*  <b>`enqueue_many`</b>: Whether each tensor in `tensors` is a single example.
 *  <b>`shapes`</b>: (Optional) The shapes for each example.  Defaults to the
-    inferred shapes for `tensor_list`.
+    inferred shapes for `tensors`.
 *  <b>`dynamic_pad`</b>: Boolean.  Allow variable dimensions in input shapes.
     The given dimensions are padded upon dequeue so that tensors within a
     batch have the same shapes.
diff --git a/tensorflow/g3doc/api_docs/python/math_ops.md b/tensorflow/g3doc/api_docs/python/math_ops.md
index 84a00b4e617..339181dcb9e 100644
--- a/tensorflow/g3doc/api_docs/python/math_ops.md
+++ b/tensorflow/g3doc/api_docs/python/math_ops.md
@@ -2133,7 +2133,7 @@ tf.complex_abs(x) ==> [5.25594902, 6.60492229]
 
 - - -
 
-### `tf.conj(input, name=None)` {#conj}
+### `tf.conj(x, name=None)` {#conj}
 
 Returns the complex conjugate of a complex number.
 
@@ -2146,20 +2146,25 @@ The complex conjugate returned by this operation is of the form \\(a - bj\\).
 
 For example:
 
-```
-# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
-```
+    # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+    tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
+
+If `x` is real, it is returned unchanged.
 
 ##### Args:
 
 
-*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `complex64`, `complex128`.
+*  <b>`x`</b>: `Tensor` to conjugate.  Must have numeric type.
 *  <b>`name`</b>: A name for the operation (optional).
 
 ##### Returns:
 
-  A `Tensor`. Has the same type as `input`.
+  A `Tensor` that is the conjugate of `x` (with the same type).
+
+##### Raises:
+
+
+*  <b>`TypeError`</b>: If `x` is not a numeric tensor.
 
 
 - - -
@@ -2211,11 +2216,12 @@ For example:
 tf.real(input) ==> [-2.25, 3.25]
 ```
 
+If `input` is already real, it is returned unchanged.
+
 ##### Args:
 
 
-*  <b>`input`</b>: A `Tensor`. Must be one of the following types: `complex64`,
-       `complex128`.
+*  <b>`input`</b>: A `Tensor`. Must have numeric type.
 *  <b>`name`</b>: A name for the operation (optional).
 
 ##### Returns:
diff --git a/tensorflow/g3doc/api_docs/python/nn.md b/tensorflow/g3doc/api_docs/python/nn.md
index b87f176e2bc..22fb0fdabb7 100644
--- a/tensorflow/g3doc/api_docs/python/nn.md
+++ b/tensorflow/g3doc/api_docs/python/nn.md
@@ -2091,7 +2091,7 @@ length(s) of the sequence(s) or completely unrolled if length(s) is not given.
 
 - - -
 
-### `tf.nn.raw_rnn(cell, loop_fn, initial_state, parallel_iterations=None, swap_memory=False, scope=None)` {#raw_rnn}
+### `tf.nn.raw_rnn(cell, loop_fn, parallel_iterations=None, swap_memory=False, scope=None)` {#raw_rnn}
 
 Creates an `RNN` specified by RNNCell `cell` and loop function `loop_fn`.
 
@@ -2109,16 +2109,18 @@ Instead of working with `Tensor` objects, most operations work with
 `TensorArray` objects directly.
 
 The operation of `raw_rnn`, in pseudo-code, is basically the following:
+
 ```
-emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype)
 time = tf.constant(0, dtype=tf.int32)
-(finished, next_input, _, loop_state) = loop_fn(
-    time=time, cell_output=None, loop_state=None)
+(finished, next_input, initial_state, _, loop_state) = loop_fn(
+    time=time, cell_output=None, cell_state=None, loop_state=None)
+emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype)
 state = initial_state
 while not all(finished):
-  (output, next_state) = cell(next_input, state)
-  (next_finished, next_input, emit, loop_state) = loop_fn(
-      time=time + 1, cell_output=output, loop_state=loop_state)
+  (output, cell_state) = cell(next_input, state)
+  (next_finished, next_input, next_state, emit, loop_state) = loop_fn(
+      time=time + 1, cell_output=output, cell_state=cell_state,
+      loop_state=loop_state)
   # Emit zeros and copy forward state for minibatch entries that are finished.
   state = tf.select(finished, state, next_state)
   emit = tf.select(finished, tf.zeros_like(emit), emit)
@@ -2142,8 +2144,14 @@ sequence_length = tf.placeholder(shape=(batch_size,), dtype=tf.int32)
 inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time)
 inputs_ta = inputs_ta.unpack(inputs)
 
-def loop_fn(time, cell_output, loop_state):
+cell = tf.nn.rnn_cell.LSTMCell(num_units)
+
+def loop_fn(time, cell_output, cell_state, loop_state):
   emit_output = cell_output  # == None for time == 0
+  if cell_output is None:  # time == 0
+    next_cell_state = cell.zero_state(batch_size, tf.float32)
+  else:
+    next_cell_state = cell_state
   elements_finished = (time >= sequence_length)
   finished = tf.reduce_all(elements_finished)
   next_input = tf.cond(
@@ -2151,11 +2159,10 @@ def loop_fn(time, cell_output, loop_state):
       lambda: tf.zeros([batch_size, input_depth], dtype=tf.float32),
       lambda: inputs_ta.read(time))
   next_loop_state = None
-  return (elements_finished, next_input, emit_output, next_loop_state)
+  return (elements_finished, next_input, next_cell_state,
+          emit_output, next_loop_state)
 
-cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-initial_state = cell.zero_state(batch_size, tf.float32)
-outputs_ta, final_state, _ = raw_rnn(cell, loop_fn, initial_state)
+outputs_ta, final_state, _ = raw_rnn(cell, loop_fn)
 outputs = outputs_ta.pack()
 ```
 
@@ -2163,25 +2170,46 @@ outputs = outputs_ta.pack()
 
 
 *  <b>`cell`</b>: An instance of RNNCell.
-*  <b>`loop_fn`</b>: A callable that takes inputs `(time, cell_output, loop_state)` and
-    returns the tuple `(finished, next_input, emit_output, next_loop_state)`.
+*  <b>`loop_fn`</b>: A callable that takes inputs
+    `(time, cell_output, cell_state, loop_state)`
+    and returns the tuple
+    `(finished, next_input, next_cell_state, emit_output, next_loop_state)`.
     Here `time` is an int32 scalar `Tensor`, `cell_output` is a
     `Tensor` or (possibly nested) tuple of tensors as determined by
-    `cell.output_size`.  In addition, `finished` is a boolean `Tensor` of
-    shape `[batch_size]`, `next_input` is the next input to feed to `cell`,
-    and `emit_output` is the output to store for this iteration.  Note that
-    `emit_output` should be a `Tensor` or (possibly nested) tuple of tensors
-    with shapes and structure matching `cell.output_size` and `cell_output`
-    above.  The parameter `loop_state` and output `next_loop_state` may be
-    either a single or (possibly nested) tuple of tensors.  This paramter
+    `cell.output_size`, and `cell_state` is a `Tensor`
+    or (possibly nested) tuple of tensors, as determined by the `loop_fn`
+    on its first call (and should match `cell.state_size`).
+    The outputs are: `finished`, a boolean `Tensor` of
+    shape `[batch_size]`, `next_input`: the next input to feed to `cell`,
+    `next_cell_state`: the next state to feed to `cell`,
+    and `emit_output`: the output to store for this iteration.
+
+    Note that `emit_output` should be a `Tensor` or (possibly nested)
+    tuple of tensors with shapes and structure matching `cell.output_size`
+    and `cell_output` above.  The parameter `cell_state` and output
+    `next_cell_state` may be either a single or (possibly nested) tuple
+    of tensors.  The parameter `loop_state` and
+    output `next_loop_state` may be either a single or (possibly nested) tuple
+    of `Tensor` and `TensorArray` objects.  This last parameter
     may be ignored by `loop_fn` and the return value may be `None`.  If it
     is not `None`, then the `loop_state` will be propagated through the RNN
     loop, for use purely by `loop_fn` to keep track of its own state.
     The `next_loop_state` parameter returned may be `None`.
 
     The first call to `loop_fn` will be `time = 0`, `cell_output = None`,
-    and `loop_state = None`.  Its `emit_output` value in this case may be
-    either `None` or a (possibly nested) tuple structure of Tensors, e.g.,
+    `cell_state = None`, and `loop_state = None`.  For this call:
+    The `next_cell_state` value should be the value with which to initialize
+    the cell's state.  It may be a final state from a previous RNN or it
+    may be the output of `cell.zero_state()`.  It should be a
+    (possibly nested) tuple structure of tensors.
+    If `cell.state_size` is an integer, this must be
+    a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
+    If `cell.state_size` is a `TensorShape`, this must be a `Tensor` of
+    appropriate type and shape `[batch_size] + cell.state_size`.
+    If `cell.state_size` is a (possibly nested) tuple of ints or
+    `TensorShape`, this will be a tuple having the corresponding shapes.
+    The `emit_output` value may be  either `None` or a (possibly nested)
+    tuple structure of tensors, e.g.,
     `(tf.zeros(shape_0, dtype=dtype_0), tf.zeros(shape_1, dtype=dtype_1))`.
     If this first `emit_output` return value is `None`,
     then the `emit_ta` result of `raw_rnn` will have the same structure and
@@ -2192,13 +2220,6 @@ outputs = outputs_ta.pack()
     consistent across all time steps.
 
 
-*  <b>`initial_state`</b>: An initial state for the RNN.
-    If `cell.state_size` is an integer, this must be
-    a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
-    If `cell.state_size` is a `TensorShape`, this must be a `Tensor` of
-    appropriate type and shape `[batch_size] + cell.state_size`.
-    If `cell.state_size` is a (possibly nested) tuple of ints or
-    `TensorShape`, this will be a tuple having the corresponding shapes.
 *  <b>`parallel_iterations`</b>: (Default: 32).  The number of iterations to run in
     parallel.  Those operations which do not have any temporal dependency
     and can be run in parallel, will be.  This parameter trades off
@@ -2214,26 +2235,25 @@ outputs = outputs_ta.pack()
 
   A tuple `(emit_ta, final_state, final_loop_state)` where:
 
-    `emit_ta`: The RNN output `TensorArray`.
-       If `loop_fn` returns a (possibly nested) set of Tensors for
-       `emit_output` during initialization, (inputs `time = 0`,
-       `cell_output = None`, and `loop_state = None`), then `emit_ta` will
-       have the same structure, dtypes, and shapes as `emit_output` instead.
-       If `loop_fn` returns `emit_output = None` during this call,
-       the structure of `cell.output_size` is used:
+  `emit_ta`: The RNN output `TensorArray`.
+     If `loop_fn` returns a (possibly nested) set of Tensors for
+     `emit_output` during initialization, (inputs `time = 0`,
+     `cell_output = None`, and `loop_state = None`), then `emit_ta` will
+     have the same structure, dtypes, and shapes as `emit_output` instead.
+     If `loop_fn` returns `emit_output = None` during this call,
+     the structure of `cell.output_size` is used:
+     If `cell.output_size` is a (possibly nested) tuple of integers
+     or `TensorShape` objects, then `emit_ta` will be a tuple having the
+     same structure as `cell.output_size`, containing TensorArrays whose
+     elements' shapes correspond to the shape data in `cell.output_size`.
 
-       If `cell.output_size` is a (possibly nested) tuple of integers
-       or `TensorShape` objects, then `emit_ta` will be a tuple having the
-       same structure as `cell.output_size`, containing TensorArrays whose
-       elements' shapes correspond to the shape data in `cell.output_size`.
+  `final_state`: The final cell state.  If `cell.state_size` is an int, this
+    will be shaped `[batch_size, cell.state_size]`.  If it is a
+    `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
+    If it is a (possibly nested) tuple of ints or `TensorShape`, this will
+    be a tuple having the corresponding shapes.
 
-    `final_state`: The final cell state.  If `cell.state_size` is an int, this
-      will be shaped `[batch_size, cell.state_size]`.  If it is a
-      `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
-      If it is a (possibly nested) tuple of ints or `TensorShape`, this will
-      be a tuple having the corresponding shapes.
-
-    `final_loop_state`: The final loop state as returned by `loop_fn`.
+  `final_loop_state`: The final loop state as returned by `loop_fn`.
 
 ##### Raises:
 
diff --git a/tensorflow/g3doc/get_started/os_setup.md b/tensorflow/g3doc/get_started/os_setup.md
index fe8e0137676..6e20b3e11ca 100644
--- a/tensorflow/g3doc/get_started/os_setup.md
+++ b/tensorflow/g3doc/get_started/os_setup.md
@@ -734,22 +734,6 @@ Every time you change the Cuda library paths you need to run this step again bef
 you invoke the bazel build command. For the cuDNN libraries, use '6.5' for R2, '7.0'
 for R3, and '4.0.4' for R4-RC.
 
-#### Build your target with GPU support
-From the root of your source tree, run:
-
-```bash
-$ bazel build -c opt --config=cuda //tensorflow/cc:tutorials_example_trainer
-
-$ bazel-bin/tensorflow/cc/tutorials_example_trainer --use_gpu
-# Lots of output. This tutorial iteratively calculates the major eigenvalue of
-# a 2x2 matrix, on GPU. The last few lines look like this.
-000009/000005 lambda = 2.000000 x = [0.894427 -0.447214] y = [1.788854 -0.894427]
-000006/000001 lambda = 2.000000 x = [0.894427 -0.447214] y = [1.788854 -0.894427]
-000009/000009 lambda = 2.000000 x = [0.894427 -0.447214] y = [1.788854 -0.894427]
-```
-
-Note that "--config=cuda" is needed to enable the GPU support.
-
 #### Known issues
 
 * Although it is possible to build both Cuda and non-Cuda configs under the same
diff --git a/tensorflow/g3doc/how_tos/variables/index.md b/tensorflow/g3doc/how_tos/variables/index.md
index ef0d803cfd2..2227044243f 100644
--- a/tensorflow/g3doc/how_tos/variables/index.md
+++ b/tensorflow/g3doc/how_tos/variables/index.md
@@ -3,7 +3,7 @@
 When you train a model, you use [variables](../../api_docs/python/state_ops.md)
 to hold and update parameters.  Variables are in-memory buffers containing
 tensors.  They must be explicitly initialized and can be saved to disk during
-and after training. You can later restore saved values to exercise or analyse
+and after training. You can later restore saved values to exercise or analyze
 the model.
 
 This document references the following TensorFlow classes.  Follow the links to
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 4766a673a4d..f53f88c1c06 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -267,7 +267,6 @@ py_library(
 py_library(
     name = "extra_py_tests_deps",
     srcs_version = "PY2AND3",
-    deps = ["//tensorflow:tensorflow_py"],
 )
 
 py_library(
@@ -357,6 +356,7 @@ cuda_py_tests(
     srcs = ["framework/function_test.py"],
     additional_deps = [
         ":functional_ops_lib",
+        "//tensorflow:tensorflow_py",
     ],
 )
 
@@ -913,6 +913,7 @@ cuda_py_tests(
     ],
     additional_deps = [
         ":device_lib",
+        "//tensorflow:tensorflow_py",
     ],
 )
 
@@ -934,6 +935,7 @@ py_tests(
     ],
     additional_deps = [
         ":net_lib",
+        "//tensorflow:tensorflow_py",
     ],
 )
 
@@ -1028,6 +1030,7 @@ py_test(
         ":framework",
         ":framework_test_lib",
         ":session",
+        "//tensorflow:tensorflow_py",
     ],
 )
 
@@ -1037,6 +1040,7 @@ cuda_py_test(
     srcs = [
         "training/localhost_cluster_performance_test.py",
     ],
+    additional_deps = ["//tensorflow:tensorflow_py"],
 )
 
 py_library(
@@ -1079,6 +1083,7 @@ cuda_py_test(
     srcs = ["client/timeline_test.py"],
     additional_deps = [
         ":timeline",
+        "//tensorflow:tensorflow_py",
     ],
 )
 
@@ -1117,12 +1122,18 @@ cuda_py_tests(
         "ops/math_grad_test.py",
         "ops/math_ops_test.py",
     ],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
 )
 
 cuda_py_test(
     name = "special_math_ops_test",
     size = "small",
     srcs = ["ops/special_math_ops_test.py"],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
 )
 
 cuda_py_tests(
@@ -1133,12 +1144,18 @@ cuda_py_tests(
         "ops/nn_test.py",
         "ops/nn_xent_test.py",
     ],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
 )
 
 cuda_py_test(
     name = "image_ops_test",
     size = "small",
     srcs = ["ops/image_ops_test.py"],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
     data = [
         "//tensorflow/core:image_testdata",
     ],
@@ -1173,6 +1190,7 @@ cuda_py_tests(
     ],
     additional_deps = [
         ":training",
+        "//tensorflow:tensorflow_py",
     ],
 )
 
@@ -1208,7 +1226,10 @@ cuda_py_test(
     name = "session_manager_test",
     size = "medium",  # TODO(irving): Can this be made small?
     srcs = ["training/session_manager_test.py"],
-    additional_deps = [":training"],
+    additional_deps = [
+        ":training",
+        "//tensorflow:tensorflow_py",
+    ],
     main = "training/session_manager_test.py",
 )
 
@@ -1220,6 +1241,7 @@ py_test(
     deps = [
         ":extra_py_tests_deps",
         ":training",
+        "//tensorflow:tensorflow_py",
     ],
 )
 
@@ -1266,6 +1288,7 @@ py_tests(
     additional_deps = [
         ":summary",
         ":training",
+        "//tensorflow:tensorflow_py",
     ],
 )
 
@@ -1324,6 +1347,7 @@ cuda_py_test(
     srcs = [
         "ops/accumulate_n_benchmark.py",
     ],
+    additional_deps = ["//tensorflow:tensorflow_py"],
     main = "ops/accumulate_n_benchmark.py",
 )
 
@@ -1332,5 +1356,6 @@ cuda_py_test(
     srcs = [
         "ops/batch_norm_benchmark.py",
     ],
+    additional_deps = ["//tensorflow:tensorflow_py"],
     main = "ops/batch_norm_benchmark.py",
 )
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 0d53f17af71..1fab0f2f0d1 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -69,6 +69,7 @@ cuda_py_test(
     additional_deps = [
         ":debug_data",
         ":debug_utils",
+        "//tensorflow:tensorflow_py",
         "//tensorflow/python:framework",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:session",
diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index 28681c66970..ca2a11db20c 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -456,6 +456,17 @@ def MakeNdarray(tensor):
 
   if tensor.tensor_content:
     return np.fromstring(tensor.tensor_content, dtype=dtype).reshape(shape)
+  elif tensor_dtype == dtypes.float16:
+    # the half_val field of the TensorProto stores the binary representation
+    # of the fp16: we need to reinterpret this as a proper float16
+    if len(tensor.half_val) == 1:
+      tmp = np.array(tensor.half_val[0], dtype=np.uint16)
+      tmp.dtype = np.float16
+      return np.repeat(tmp, num_elements).reshape(shape)
+    else:
+      tmp = np.fromiter(tensor.half_val, dtype=np.uint16)
+      tmp.dtype = np.float16
+      return tmp.reshape(shape)
   elif tensor_dtype == dtypes.float32:
     if len(tensor.float_val) == 1:
       return np.repeat(np.array(tensor.float_val[0], dtype=dtype),
diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py
index c91847c0203..0a73abde15a 100644
--- a/tensorflow/python/framework/tensor_util_test.py
+++ b/tensorflow/python/framework/tensor_util_test.py
@@ -139,6 +139,23 @@ class TensorUtilTest(tf.test.TestCase):
                                     [10.0, 10.0, 10.0, 10.0],
                                     [10.0, 10.0, 10.0, 10.0]], dtype=nptype), a)
 
+  def testHalf(self):
+    t = tensor_util.make_tensor_proto(np.array([10.0, 20.0], dtype=np.float16))
+    self.assertProtoEquals("""
+      dtype: DT_HALF
+      tensor_shape {
+        dim {
+          size: 2
+        }
+      }
+      half_val: 18688
+      half_val: 19712
+      """, t)
+
+    a = tensor_util.MakeNdarray(t)
+    self.assertEquals(np.float16, a.dtype)
+    self.assertAllClose(np.array([10.0, 20.0], dtype=np.float16), a)
+
   def testInt(self):
     t = tensor_util.make_tensor_proto(10)
     self.assertProtoEquals("""
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 3342617cad5..8e4a03eeea1 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -432,7 +432,8 @@ class TensorFlowTestCase(googletest.TestCase):
       # absolute difference atol are added together to compare against
       # the absolute difference between a and b.  Here, we want to
       # print out which elements violate such conditions.
-      cond = np.abs(a - b) > atol + rtol * np.abs(b)
+      cond = np.logical_or(
+          np.abs(a - b) > atol + rtol * np.abs(b), np.isnan(a) != np.isnan(b))
       if a.ndim:
         x = a[np.where(cond)]
         y = b[np.where(cond)]
@@ -444,6 +445,7 @@ class TensorFlowTestCase(googletest.TestCase):
       print("not close rhs = ", y)
       print("not close dif = ", np.abs(x - y))
       print("not close tol = ", atol + rtol * np.abs(y))
+      print("dtype = %s, shape = %s" % (a.dtype, a.shape))
       np.testing.assert_allclose(a, b, rtol=rtol, atol=atol)
 
   def assertAllCloseAccordingToType(self, a, b, rtol=1e-6, atol=1e-6):
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index f1e5b042ef7..65b8539e4a9 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -73,12 +73,18 @@ py_tests(
         "variables_test.py",
         "where_op_test.py",
     ],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
 )
 
 cuda_py_tests(
     name = "cast_op_test",
     size = "small",
     srcs = ["cast_op_test.py"],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
     tags = ["noasan"],
 )
 
@@ -86,6 +92,9 @@ cuda_py_test(
     name = "dense_update_ops_no_tsan_test",
     size = "small",
     srcs = ["dense_update_ops_no_tsan_test.py"],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
     tags = ["notsan"],
 )
 
@@ -93,6 +102,9 @@ tf_py_test(
     name = "diag_op_test",
     size = "medium",
     srcs = ["diag_op_test.py"],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
     shard_count = 2,
 )
 
@@ -101,7 +113,7 @@ py_tests(
     size = "small",
     srcs = ["reader_ops_test.py"],
     additional_deps = [
-        "//tensorflow/python:lib",
+        "//tensorflow:tensorflow_py",
     ],
 )
 
@@ -180,6 +192,9 @@ cuda_py_tests(
         "xent_op_test.py",
         "zero_division_test.py",
     ],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
 )
 
 cuda_py_tests(
@@ -204,6 +219,9 @@ cuda_py_tests(
         "sparse_ops_test.py",
         "sparse_tensor_dense_matmul_op_test.py",
     ],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
 )
 
 # TODO(gpapan): Revisit the gradient of extract_image_patches_op to resolve
@@ -212,6 +230,9 @@ cuda_py_test(
     name = "extract_image_patches_grad_test",
     size = "medium",
     srcs = ["extract_image_patches_grad_test.py"],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
     tags = ["notap"],  # http://b/31080670
 )
 
@@ -219,6 +240,9 @@ cuda_py_test(
     name = "concat_op_test",
     size = "medium",
     srcs = ["concat_op_test.py"],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
     tags = ["notsan"],  # http://b/30445083
 )
 
@@ -233,6 +257,9 @@ cuda_py_tests(
         "linalg_grad_test.py",
         "svd_op_test.py",
     ],
+    additional_deps = [
+        "//tensorflow:tensorflow_py",
+    ],
     shard_count = 50,
     tags = ["notap"],  # b/30226163
 )
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 542af1a87e6..c022ee881af 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
 import time
 
 import numpy as np
@@ -717,5 +716,36 @@ class SliceAssignTest(test_util.TensorFlowTestCase):
         v = tf.Variable([1, 2])
         sess.run(v[:].assign([1, 2]))
 
+
+class ShapeSizeRankTest(test_util.TensorFlowTestCase):
+
+  def testDenseShape(self):
+    with self.test_session():
+      t_value = [[0, 42], [24, 0]]
+      self.assertAllEqual((2, 2), tf.shape(t_value).eval())
+      self.assertEqual(4, tf.size(t_value).eval())
+      self.assertEqual(2, tf.rank(t_value).eval())
+
+      t = tf.constant(t_value)
+      self.assertAllEqual((2, 2), tf.shape(t).eval())
+      self.assertEqual(4, tf.size(t).eval())
+      self.assertEqual(2, tf.rank(t).eval())
+
+  def testSparseShape(self):
+    with self.test_session():
+      sp_value = tf.SparseTensorValue(
+          indices=((0, 1), (1, 0)),
+          values=(42, 24),
+          shape=(2, 2))
+      self.assertAllEqual((2, 2), tf.shape(sp_value).eval())
+      self.assertEqual(4, tf.size(sp_value).eval())
+      self.assertEqual(2, tf.rank(sp_value).eval())
+
+      sp = tf.SparseTensor.from_value(sp_value)
+      self.assertAllEqual((2, 2), tf.shape(sp).eval())
+      self.assertEqual(4, tf.size(sp).eval())
+      self.assertEqual(2, tf.rank(sp).eval())
+
+
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index 8b966055e66..37f718b1224 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -24,6 +24,8 @@ import math
 import numpy as np
 import tensorflow as tf
 
+from tensorflow.python.ops import gen_math_ops
+
 
 _ADD = lambda x, y: x + y
 _SUB = lambda x, y: x - y
@@ -77,8 +79,7 @@ class UnaryOpTest(tf.test.TestCase):
       else:
         self.assertAllClose(np_ans, tf_cpu)
 
-      if (x.dtype in (np.complex64, np.complex128) and
-            tf_func in (tf.sign, tf.sqrt, tf.rsqrt, tf.log)):
+      if x.dtype in (np.complex64, np.complex128) and tf_func == tf.sign:
         return  # Return early
 
       if x.dtype == np.float16:
@@ -369,7 +370,7 @@ class UnaryOpTest(tf.test.TestCase):
     self._compareCpu(x, np.negative, _NEG)
     self._compareCpu(y, self._inv, tf.inv)
     self._compareCpu(x, np.square, tf.square)
-    self._compareCpu(x, np.sqrt, tf.sqrt)
+    self._compareCpu(y, np.sqrt, tf.sqrt)
     self._compareCpu(y, self._rsqrt, tf.rsqrt)
     self._compareCpu(x, np.exp, tf.exp)
     self._compareCpu(y, np.log, tf.log)
@@ -400,7 +401,7 @@ class UnaryOpTest(tf.test.TestCase):
     self._compareCpu(x, np.negative, _NEG)
     self._compareCpu(y, self._inv, tf.inv)
     self._compareCpu(x, np.square, tf.square)
-    self._compareCpu(x, np.sqrt, tf.sqrt)
+    self._compareCpu(y, np.sqrt, tf.sqrt)
     self._compareCpu(y, self._rsqrt, tf.rsqrt)
     self._compareCpu(x, np.exp, tf.exp)
     self._compareCpu(y, np.log, tf.log)
@@ -421,6 +422,36 @@ class UnaryOpTest(tf.test.TestCase):
     self._compareCpu(y, complex_sign, tf.sign)
     self._compareBothSparse(y, complex_sign, tf.sign)
 
+  def testGradGrad(self):
+    np.random.seed(7)
+    shape = (5,)
+    dtype_tols = [(np.float32, 1e-3), (np.float64, 1e-6), (np.complex64, 1e-3),
+                  (np.complex128, 1e-6)]
+    op_range = [(gen_math_ops._inv_grad, [-2, 2]),
+                (gen_math_ops._sigmoid_grad, [-2, 2]),
+                (gen_math_ops._sqrt_grad, [1, 3]),
+                (gen_math_ops._tanh_grad, [-2, 2]),]
+
+    def rand(dtype):
+      x = np.random.uniform(
+          real_range[0], real_range[1], size=shape[0]).astype(dtype)
+      if dtype in (np.complex64, np.complex128):
+        x += 1j * np.random.uniform(-2, 2, size=shape[0]).astype(dtype)
+      return x
+
+    for op, real_range in op_range:
+      with self.test_session():
+        for dtype, tol in dtype_tols:
+          x = tf.constant(rand(dtype))
+          y = tf.constant(rand(dtype))
+          z = op(x, y)
+          error = tf.test.compute_gradient_error(
+              [x, y], [shape, shape],
+              z,
+              shape,
+              x_init_value=[rand(dtype), rand(dtype)])
+          self.assertLess(error, tol)
+
 
 class BinaryOpTest(tf.test.TestCase):
 
@@ -453,6 +484,12 @@ class BinaryOpTest(tf.test.TestCase):
         self.assertAllClose(np_ans, np_var_right)
     self.assertShapeEqual(np_ans, out)
 
+  _GRAD_TOL = {tf.float16: 1e-3,
+               tf.float32: 1e-3,
+               tf.complex64: 1e-2,
+               tf.float64: 1e-5,
+               tf.complex128: 1e-4}
+
   def _compareGradientX(self, x, y, np_func, tf_func,
                         numeric_gradient_type=None):
     z = np_func(x, y)
@@ -483,12 +520,8 @@ class BinaryOpTest(tf.test.TestCase):
                                               x_init_value=xf,
                                               delta=1e-3)
         jacob_n = jacob_n.astype(x.dtype)
-      if x.dtype == np.float16:
-        self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
-      elif x.dtype == np.float32:
-        self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
-      elif x.dtype == np.float64:
-        self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
+      tol = self._GRAD_TOL[tf.as_dtype(x.dtype)]
+      self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol)
 
   def _compareGradientY(self, x, y, np_func, tf_func,
                         numeric_gradient_type=None):
@@ -519,12 +552,8 @@ class BinaryOpTest(tf.test.TestCase):
                                               zs,
                                               x_init_value=yf)
         jacob_n = jacob_n.astype(x.dtype)
-    if x.dtype == np.float16:
-      self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
-    elif x.dtype == np.float32:
-      self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
-    elif x.dtype == np.float64:
-      self.assertAllClose(jacob_t, jacob_n, rtol=1e-5, atol=1e-5)
+    tol = self._GRAD_TOL[tf.as_dtype(x.dtype)]
+    self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol)
 
   def _compareGpu(self, x, y, np_func, tf_func):
     np_ans = np_func(x, y)
@@ -722,10 +751,15 @@ class BinaryOpTest(tf.test.TestCase):
     self._compareCpu(z, w, _ADD, _ADD)
 
   def _compareBCast(self, xs, ys, dtype, np_func, tf_func):
-    x = (1 + np.linspace(0, 5, np.prod(xs))).astype(dtype).reshape(xs)
-    y = (1 + np.linspace(0, 5, np.prod(ys))).astype(dtype).reshape(ys)
+    if dtype in (np.complex64, np.complex128):
+      x = (1 + np.linspace(0, 2 + 3j, np.prod(xs))).astype(dtype).reshape(xs)
+      y = (1 + np.linspace(0, 2 - 2j, np.prod(ys))).astype(dtype).reshape(ys)
+    else:
+      x = (1 + np.linspace(0, 5, np.prod(xs))).astype(dtype).reshape(xs)
+      y = (1 + np.linspace(0, 5, np.prod(ys))).astype(dtype).reshape(ys)
     self._compareCpu(x, y, np_func, tf_func)
-    if x.dtype in (np.float16, np.float32, np.float64):
+    if x.dtype in (np.float16, np.float32, np.float64, np.complex64,
+                   np.complex128):
       if tf_func not in (_FLOORDIV, tf.floordiv):
         if x.dtype == np.float16:
           # Compare fp16 theoretical gradients to fp32 numerical gradients,
@@ -1000,25 +1034,15 @@ class BinaryOpTest(tf.test.TestCase):
         error = tf.test.compute_gradient_error(y, [], z, [])
         self.assertEqual(error, 0)
 
-  def testComplexPowGradPositiveBase(self):
+  def testComplexPowGrad(self):
     with self.test_session():
       for dtype in np.complex64, np.complex128:
-        x = tf.constant(2.0, dtype=dtype)
-        y = tf.constant(2.0, dtype=dtype)
-        z = tf.pow(x, y)
-        error = tf.test.compute_gradient_error(y, [], z, [])
-        self.assertLess(error, 1e-4)
-
-  def testComplexPowGradNegativeBase(self):
-    with self.test_session() as session:
-      for dtype in np.complex64, np.complex128:
-        x = tf.constant(-2.0, dtype=dtype)
-        y = tf.constant(2.0, dtype=dtype)
-        z = tf.pow(x, y)
-        expected_x_grad = -4
-        expected_y_grad = (-2)**2 * (np.log(2) + np.pi * 1j)
-        self.assertAllClose([expected_x_grad, expected_y_grad],
-                            session.run(tf.gradients(z, [x, y])))
+        for base in 2.0, -2.0:
+          x = tf.constant(base, dtype=dtype)
+          y = tf.constant(2.0, dtype=dtype)
+          z = tf.pow(x, y)
+          error = tf.test.compute_gradient_error(y, [], z, [])
+          self.assertLess(error, 2e-4)
 
 
 class ComparisonOpTest(tf.test.TestCase):
@@ -1770,6 +1794,12 @@ class ComplexMakeRealImagTest(tf.test.TestCase):
     self._compareRealImag(cplx, use_gpu=False)
     self._compareRealImag(cplx, use_gpu=True)
 
+  def testRealReal(self):
+    for dtype in tf.int32, tf.int64, tf.float32, tf.float64:
+      x = tf.placeholder(dtype)
+      y = tf.real(x)
+      self.assertEqual(x, y)
+
   def _compareConj(self, cplx, use_gpu):
     np_ans = np.conj(cplx)
     with self.test_session(use_gpu=use_gpu):
@@ -1793,6 +1823,17 @@ class ComplexMakeRealImagTest(tf.test.TestCase):
     self._compareConj(cplx, use_gpu=False)
     self._compareConj(cplx, use_gpu=True)
 
+  def testConjReal(self):
+    for dtype in tf.int32, tf.int64, tf.float16, tf.float32, tf.float64:
+      x = tf.placeholder(dtype)
+      y = tf.conj(x)
+      self.assertEqual(x, y)
+
+  def testConjString(self):
+    x = tf.placeholder(tf.string)
+    with self.assertRaisesRegexp(TypeError, r"Expected numeric tensor"):
+      tf.conj(x)
+
   def _compareGradient(self, x):
     # x[:, 0] is real, x[:, 1] is imag.  We combine real and imag into
     # complex numbers. Then, we extract real and imag parts and
@@ -1822,14 +1863,14 @@ class ComplexMakeRealImagTest(tf.test.TestCase):
     epsilon = 1e-3
     with self.test_session():
       for args in [(x_, 0.), (0., x_)]:
-          z = tf.reduce_sum(tf.complex_abs(tf.complex(*args)))
-          jacob_t, jacob_n = tf.test.compute_gradient(x_,
-                                                      list(x.shape),
-                                                      z,
-                                                      [1],
-                                                      x_init_value=x,
-                                                      delta=epsilon)
-          self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
+        z = tf.reduce_sum(tf.complex_abs(tf.complex(*args)))
+        jacob_t, jacob_n = tf.test.compute_gradient(x_,
+                                                    list(x.shape),
+                                                    z,
+                                                    [1],
+                                                    x_init_value=x,
+                                                    delta=epsilon)
+        self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
 
   def testGradient(self):
     # complex64
diff --git a/tensorflow/python/kernel_tests/draw_bounding_box_op_test.py b/tensorflow/python/kernel_tests/draw_bounding_box_op_test.py
new file mode 100644
index 00000000000..aaccd955141
--- /dev/null
+++ b/tensorflow/python/kernel_tests/draw_bounding_box_op_test.py
@@ -0,0 +1,102 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for draw_bounding_box_op."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+
+
+class DrawBoundingBoxOpTest(tf.test.TestCase):
+
+  def _fillBorder(self, image, color):
+    """Fill the border of the image.
+
+    Args:
+      image: Numpy array of shape [height, width, depth].
+      color: Numpy color of shape [depth] and either contents RGB/RGBA.
+
+    Returns:
+      image of original shape with border filled with "color".
+
+    Raises:
+      ValueError: Depths of image and color don"t match.
+    """
+    height, width, depth = image.shape
+    if depth != color.shape[0]:
+      raise ValueError("Image (%d) and color (%d) depths must match." %
+                       (depth, color.shape[0]))
+    image[0:height, 0, 0:depth] = color
+    image[0:height, width - 1, 0:depth] = color
+    image[0, 0:width, 0:depth] = color
+    image[height - 1, 0:width, 0:depth] = color
+    return image
+
+  def _testDrawBoundingBoxColorCycling(self, img):
+    """Tests if cycling works appropriately.
+
+    Args:
+      img: 3-D numpy image on which to draw.
+    """
+    # THIS TABLE MUST MATCH draw_bounding_box_op.cc
+    color_table = np.asarray([[1, 1, 0, 1], [0, 0, 1, 1], [1, 0, 0, 1],
+                              [0, 1, 0, 1], [0.5, 0, 0.5, 1],
+                              [0.5, 0.5, 0, 1], [0.5, 0, 0, 1],
+                              [0, 0, 0.5, 1], [0, 1, 1, 1],
+                              [1, 0, 1, 1]])
+    assert len(img.shape) == 3
+    depth = img.shape[2]
+    assert depth <= color_table.shape[1]
+    assert depth == 1 or depth == 3 or depth == 4
+    ## Set red channel to 1 if image is GRY.
+    if depth == 1:
+      color_table[:, 0] = 1
+    num_colors = color_table.shape[0]
+    for num_boxes in range(1, num_colors + 2):
+      # Generate draw_bounding_box_op drawn image
+      image = np.copy(img)
+      color = color_table[(num_boxes - 1) % num_colors, 0:depth]
+      test_drawn_image = self._fillBorder(image, color)
+      bboxes = np.asarray([0, 0, 1, 1])
+      bboxes = np.vstack([bboxes for _ in range(num_boxes)])
+      bboxes = tf.to_float(bboxes)
+      bboxes = tf.expand_dims(bboxes, 0)
+      image = tf.convert_to_tensor(image)
+      image = tf.image.convert_image_dtype(image, tf.float32)
+      image = tf.expand_dims(image, 0)
+      image = tf.image.draw_bounding_boxes(image, bboxes)
+      with self.test_session(use_gpu=False) as sess:
+        op_drawn_image = np.squeeze(sess.run(image), 0)
+        self.assertAllEqual(test_drawn_image, op_drawn_image)
+
+  def testDrawBoundingBoxRGBColorCycling(self):
+    """Test if RGB color cycling works correctly."""
+    image = np.zeros([10, 10, 3], "float32")
+    self._testDrawBoundingBoxColorCycling(image)
+
+  def testDrawBoundingBoxRGBAColorCycling(self):
+    """Test if RGBA color cycling works correctly."""
+    image = np.zeros([10, 10, 4], "float32")
+    self._testDrawBoundingBoxColorCycling(image)
+
+  def testDrawBoundingBoxGRY(self):
+    """Test if drawing bounding box on a GRY image works."""
+    image = np.zeros([4, 4, 1], "float32")
+    self._testDrawBoundingBoxColorCycling(image)
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow/python/kernel_tests/edit_distance_op_test.py b/tensorflow/python/kernel_tests/edit_distance_op_test.py
index 6d5cf73fc55..4662b956cfe 100644
--- a/tensorflow/python/kernel_tests/edit_distance_op_test.py
+++ b/tensorflow/python/kernel_tests/edit_distance_op_test.py
@@ -31,26 +31,49 @@ def ConstantOf(x):
 
 class EditDistanceTest(tf.test.TestCase):
 
-  def _testEditDistance(self, hypothesis, truth, normalize,
-                        expected_output, expected_err_re=None):
-    # hypothesis and truth are (index, value, shape) tuples
-    hypothesis_st = tf.SparseTensor(*[ConstantOf(x) for x in hypothesis])
-    truth_st = tf.SparseTensor(*[ConstantOf(x) for x in truth])
+  def _testEditDistanceST(
+      self, hypothesis_st, truth_st, normalize, expected_output,
+      expected_shape, expected_err_re=None):
     edit_distance = tf.edit_distance(
         hypothesis=hypothesis_st, truth=truth_st, normalize=normalize)
 
-    with self.test_session():
-      if expected_err_re is None:
-        # Shape inference figures out the shape from the shape variables
-        # Explicit tuple() needed since zip returns an iterator in Python 3.
-        expected_shape = [
-            max(h, t) for h, t in tuple(zip(hypothesis[2], truth[2]))[:-1]]
-        self.assertEqual(edit_distance.get_shape(), expected_shape)
-        output = edit_distance.eval()
-        self.assertAllClose(output, expected_output)
-      else:
-        with self.assertRaisesOpError(expected_err_re):
-          edit_distance.eval()
+    if expected_err_re is None:
+      self.assertEqual(edit_distance.get_shape(), expected_shape)
+      output = edit_distance.eval()
+      self.assertAllClose(output, expected_output)
+    else:
+      with self.assertRaisesOpError(expected_err_re):
+        edit_distance.eval()
+
+  def _testEditDistance(self, hypothesis, truth, normalize,
+                        expected_output, expected_err_re=None):
+    # Shape inference figures out the shape from the shape variables
+    # Explicit tuple() needed since zip returns an iterator in Python 3.
+    expected_shape = [
+        max(h, t) for h, t in tuple(zip(hypothesis[2], truth[2]))[:-1]]
+
+    # SparseTensorValue inputs.
+    with tf.Graph().as_default() as g, self.test_session(g):
+      # hypothesis and truth are (index, value, shape) tuples
+      self._testEditDistanceST(
+          hypothesis_st=tf.SparseTensorValue(
+              *[ConstantOf(x) for x in hypothesis]),
+          truth_st=tf.SparseTensorValue(*[ConstantOf(x) for x in truth]),
+          normalize=normalize,
+          expected_output=expected_output,
+          expected_shape=expected_shape,
+          expected_err_re=expected_err_re)
+
+    # SparseTensor inputs.
+    with tf.Graph().as_default() as g, self.test_session(g):
+      # hypothesis and truth are (index, value, shape) tuples
+      self._testEditDistanceST(
+          hypothesis_st=tf.SparseTensor(*[ConstantOf(x) for x in hypothesis]),
+          truth_st=tf.SparseTensor(*[ConstantOf(x) for x in truth]),
+          normalize=normalize,
+          expected_output=expected_output,
+          expected_shape=expected_shape,
+          expected_err_re=expected_err_re)
 
   def testEditDistanceNormalized(self):
     hypothesis_indices = [[0, 0], [0, 1],
diff --git a/tensorflow/python/kernel_tests/random_gamma_test.py b/tensorflow/python/kernel_tests/random_gamma_test.py
index b824f559064..9231f0cc699 100644
--- a/tensorflow/python/kernel_tests/random_gamma_test.py
+++ b/tensorflow/python/kernel_tests/random_gamma_test.py
@@ -27,6 +27,10 @@ import tensorflow as tf
 class RandomGammaTest(tf.test.TestCase):
   """This is a medium test due to the moments computation taking some time."""
 
+  def setUp(self):
+    np.random.seed(137)
+    tf.set_random_seed(137)
+
   def _Sampler(self, num, alpha, beta, dtype, use_gpu, seed=None):
 
     def func():
@@ -39,85 +43,116 @@ class RandomGammaTest(tf.test.TestCase):
 
     return func
 
-  def testMoments(self):
+  def testMomentsFloat32(self):
+    self._testMoments(tf.float32)
+
+  def testMomentsFloat64(self):
+    self._testMoments(tf.float64)
+
+  def _testMoments(self, dt):
     try:
       from scipy import stats  # pylint: disable=g-import-not-at-top
-      z_limit = 6.0
-      for dt in tf.float16, tf.float32, tf.float64:
-        for stride in 0, 1, 4, 17:
-          for alpha in 0.2, 0.7, 3.0:
-            for scale in 9, 17:
-              # Gamma moments only defined for values less than the scale param.
-              max_moment = scale // 2
-              sampler = self._Sampler(
-                  1000, alpha, 1 / scale, dt, use_gpu=False, seed=137)
-              moments = [0] * (max_moment + 1)
-              moments_sample_count = [0] * (max_moment + 1)
-              x = np.array(sampler().flat)  # sampler does 10x samples
-              for k in range(len(x)):
-                moment = 1.
-                for i in range(max_moment + 1):
-                  index = k + i * stride
-                  if index >= len(x):
-                    break
-                  moments[i] += moment
-                  moments_sample_count[i] += 1
-                  moment *= x[index]
-              for i in range(max_moment + 1):
-                moments[i] /= moments_sample_count[i]
-              for i in range(1, max_moment + 1):
-                g = stats.gamma(alpha, scale=scale)
-                if stride == 0:
-                  moments_i_mean = g.moment(i)
-                  moments_i_squared = g.moment(2 * i)
-                else:
-                  moments_i_mean = pow(g.moment(1), i)
-                  moments_i_squared = pow(g.moment(2), i)
-                moments_i_var = (
-                    moments_i_squared - moments_i_mean * moments_i_mean)
-                # Assume every operation has a small numerical error.
-                # It takes i multiplications to calculate one i-th moment.
-                error_per_moment = i * 1e-6
-                total_variance = (
-                    moments_i_var / moments_sample_count[i] + error_per_moment)
-                if not total_variance:
-                  total_variance = 1e-10
-                # z_test is approximately a unit normal distribution.
-                z_test = abs(
-                    (moments[i] - moments_i_mean) / math.sqrt(total_variance))
-                self.assertLess(z_test, z_limit)
     except ImportError as e:
-      tf.logging.warn("Cannot test distribution moments: %s" % e)
+      tf.logging.warn("Cannot test moments: %s" % e)
+      return
+
+    # Check the given array of samples matches the given theoretical moment
+    # function at different orders. The test is considered passing if the
+    # z-tests of all statistical moments are all below z_limit.
+    # Parameters:
+    #   max_moments: the largest moments of the distribution to be tested
+    #   stride: the distance between samples to check for statistical properties
+    #       0 means the n-th moment of each sample
+    #       any other strides tests for spatial correlation between samples;
+    #   z_limit: the maximum z-test we would consider the test to pass;
+
+    # The moments test is a z-value test.  This is the largest z-value
+    # we want to tolerate. Since the z-test approximates a unit normal
+    # distribution, it should almost definitely never exceed 6.
+    z_limit = 6.0
+
+    for stride in 0, 1, 4, 17:
+      alphas = [0.2, 1.0, 3.0]
+      if dt == tf.float64:
+        alphas = [0.01] + alphas
+      for alpha in alphas:
+        for scale in 9, 17:
+          # Gamma moments only defined for values less than the scale param.
+          max_moment = min(6, scale // 2)
+          sampler = self._Sampler(
+              20000, alpha, 1 / scale, dt, use_gpu=False, seed=12345)
+          moments = [0] * (max_moment + 1)
+          moments_sample_count = [0] * (max_moment + 1)
+          x = np.array(sampler().flat)  # sampler does 10x samples
+          for k in range(len(x)):
+            moment = 1.
+            for i in range(max_moment + 1):
+              index = k + i * stride
+              if index >= len(x):
+                break
+              moments[i] += moment
+              moments_sample_count[i] += 1
+              moment *= x[index]
+          for i in range(max_moment + 1):
+            moments[i] /= moments_sample_count[i]
+          for i in range(1, max_moment + 1):
+            g = stats.gamma(alpha, scale=scale)
+            if stride == 0:
+              moments_i_mean = g.moment(i)
+              moments_i_squared = g.moment(2 * i)
+            else:
+              moments_i_mean = pow(g.moment(1), i)
+              moments_i_squared = pow(g.moment(2), i)
+            # Calculate moment variance safely:
+            # This is just
+            #  (moments_i_squared - moments_i_mean**2) / moments_sample_count[i]
+            normalized_moments_i_var = (
+                moments_i_mean / moments_sample_count[i] * (
+                    moments_i_squared/moments_i_mean - moments_i_mean))
+            # Assume every operation has a small numerical error.
+            # It takes i multiplications to calculate one i-th moment.
+            error_per_moment = i * np.finfo(dt.as_numpy_dtype).eps
+            total_variance = (
+                normalized_moments_i_var + error_per_moment)
+            tiny = np.finfo(dt.as_numpy_dtype).tiny
+            self.assertGreaterEqual(total_variance, 0)
+            if total_variance < tiny:
+              total_variance = tiny
+            # z_test is approximately a unit normal distribution.
+            z_test = abs(
+                (moments[i] - moments_i_mean) / math.sqrt(total_variance))
+            self.assertLess(z_test, z_limit)
 
   def _testZeroDensity(self, alpha):
     """Zero isn't in the support of the gamma distribution.
 
-    But quantized floating point math has its limits. # TODO(bjp):
-    Implement log-gamma sampler for small-shape distributions.
+    But quantized floating point math has its limits.
+    TODO(bjp): Implement log-gamma sampler for small-shape distributions.
 
     Args:
       alpha: float shape value to test
     """
     try:
       from scipy import stats  # pylint: disable=g-import-not-at-top
-      allowable_zeros = {
-          tf.float16: stats.gamma(alpha).cdf(np.finfo(np.float16).tiny),
-          tf.float32: stats.gamma(alpha).cdf(np.finfo(np.float32).tiny),
-          tf.float64: stats.gamma(alpha).cdf(np.finfo(np.float64).tiny)
-      }
-      failures = []
-      for use_gpu in [False, True]:
-        for dt in tf.float16, tf.float32, tf.float64:
-          sampler = self._Sampler(1000, alpha, 1.0, dt, use_gpu=use_gpu)
-          x = sampler()
-          allowable = allowable_zeros[dt] * x.size
-          allowable = allowable * 2 if allowable < 10 else allowable * 1.05
-          if np.sum(x <= 0) > allowable:
-            failures += [(use_gpu, dt)]
-      self.assertEqual([], failures)
-
     except ImportError as e:
-      tf.logging.warn("Cannot test using gamma cdf: %s" % e)
+      tf.logging.warn("Cannot test zero density proportions: %s" % e)
+      return
+    allowable_zeros = {
+        tf.float16: stats.gamma(alpha).cdf(np.finfo(np.float16).tiny),
+        tf.float32: stats.gamma(alpha).cdf(np.finfo(np.float32).tiny),
+        tf.float64: stats.gamma(alpha).cdf(np.finfo(np.float64).tiny)
+    }
+    failures = []
+    for use_gpu in [False, True]:
+      for dt in tf.float16, tf.float32, tf.float64:
+        sampler = self._Sampler(
+            10000, alpha, 1.0, dt, use_gpu=use_gpu, seed=12345)
+        x = sampler()
+        allowable = allowable_zeros[dt] * x.size
+        allowable = allowable * 2 if allowable < 10 else allowable * 1.05
+        if np.sum(x <= 0) > allowable:
+          failures += [(use_gpu, dt)]
+      self.assertEqual([], failures)
 
   def testNonZeroSmallShape(self):
     self._testZeroDensity(0.01)
diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py
index 7a2e3b96b65..7e250e1004b 100644
--- a/tensorflow/python/kernel_tests/reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/reduction_ops_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Functional tests for reduction ops."""
 from __future__ import absolute_import
 from __future__ import division
@@ -65,7 +64,11 @@ class ReducedShapeTest(tf.test.TestCase):
 
 class SumReductionTest(tf.test.TestCase):
 
-  def _compare(self, x, reduction_axes, keep_dims, use_gpu=False,
+  def _compare(self,
+               x,
+               reduction_axes,
+               keep_dims,
+               use_gpu=False,
                feed_dict=None):
     np_ans = x
     if reduction_axes is None:
@@ -89,6 +92,13 @@ class SumReductionTest(tf.test.TestCase):
     self._compare(x, reduction_axes, True, use_gpu=True, feed_dict=feed_dict)
     self._compare(x, reduction_axes, True, use_gpu=False, feed_dict=feed_dict)
 
+  def testInfinity(self):
+    for dtype in [np.float32, np.float64]:
+      for special_value_x in [-np.inf, np.inf]:
+        for special_value_y in [-np.inf, np.inf]:
+          np_arr = np.array([special_value_x, special_value_y]).astype(dtype)
+          self._compareAll(np_arr, None)
+
   def testFloatReduce1D(self):
     # Create a 1D array of floats
     np_arr = np.arange(1, 6).reshape([5]).astype(np.float32)
@@ -216,12 +226,12 @@ class SumReductionTest(tf.test.TestCase):
     # Reduction indices are unknown.
     unknown_indices = tf.placeholder(tf.int32)
     c_unknown_indices = tf.constant([[10.0], [20.0]])
-    s_unknown_indices = tf.reduce_sum(c_unknown_indices, unknown_indices,
-                                     keep_dims=False)
+    s_unknown_indices = tf.reduce_sum(
+        c_unknown_indices, unknown_indices, keep_dims=False)
     self.assertEqual(tensor_shape.unknown_shape(),
                      s_unknown_indices.get_shape())
-    s_unknown_indices_keep = tf.reduce_sum(c_unknown_indices, unknown_indices,
-                                          keep_dims=True)
+    s_unknown_indices_keep = tf.reduce_sum(
+        c_unknown_indices, unknown_indices, keep_dims=True)
     self.assertEqual(2, s_unknown_indices_keep.get_shape().ndims)
 
   # Int64??
@@ -234,12 +244,8 @@ class SumReductionTest(tf.test.TestCase):
     with self.test_session():
       t = tf.convert_to_tensor(x)
       su = tf.reduce_sum(t, reduction_axes)
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  shape,
-                                                  su,
-                                                  sum_shape,
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, shape, su, sum_shape, x_init_value=x, delta=1)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 
   def testGradient(self):
@@ -336,6 +342,13 @@ class MeanReductionTest(tf.test.TestCase):
     self._compareAll(np_arr, [0, 2])
     self._compareAll(np_arr, [0, 1, 2])
 
+  def testInfinity(self):
+    for dtype in [np.float32, np.float64]:
+      for special_value_x in [-np.inf, np.inf]:
+        for special_value_y in [-np.inf, np.inf]:
+          np_arr = np.array([special_value_x, special_value_y]).astype(dtype)
+          self._compareAll(np_arr, None)
+
   def testDoubleReduce3D(self):
     # Create a 3D array of doubles and reduce across all possible
     # dimensions
@@ -356,30 +369,18 @@ class MeanReductionTest(tf.test.TestCase):
     with self.test_session():
       t = tf.convert_to_tensor(x)
       su = tf.reduce_mean(t, [1, 2])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  s,
-                                                  su,
-                                                  [2, 2],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, s, su, [2, 2], x_init_value=x, delta=1)
       self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
 
       su = tf.reduce_mean(t, [0, 1, 2, 3])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  s,
-                                                  su,
-                                                  [1],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, s, su, [1], x_init_value=x, delta=1)
       self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
 
       su = tf.reduce_mean(t, [])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  s,
-                                                  su,
-                                                  [2, 3, 4, 2],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, s, su, [2, 3, 4, 2], x_init_value=x, delta=1)
       self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
 
       su = tf.reduce_mean(t, 0)
@@ -430,6 +431,13 @@ class ProdReductionTest(tf.test.TestCase):
     self._compare(x, reduction_axes, False)
     self._compare(x, reduction_axes, True)
 
+  def testInfinity(self):
+    for dtype in [np.float32, np.float64]:
+      for special_value_x in [-np.inf, np.inf]:
+        for special_value_y in [-np.inf, np.inf]:
+          np_arr = np.array([special_value_x, special_value_y]).astype(dtype)
+          self._compareAll(np_arr, None)
+
   def testFloatReduce3D(self):
     # Create a 3D array of floats and reduce across all possible
     # dimensions
@@ -449,30 +457,18 @@ class ProdReductionTest(tf.test.TestCase):
       t = tf.convert_to_tensor(x)
 
       su = tf.reduce_prod(t, [])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  x.shape,
-                                                  su,
-                                                  [2, 3, 4, 2],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, x.shape, su, [2, 3, 4, 2], x_init_value=x, delta=1)
       self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
 
       su = tf.reduce_prod(t, [1, 2])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  x.shape,
-                                                  su,
-                                                  [2, 2],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, x.shape, su, [2, 2], x_init_value=x, delta=1)
       self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
 
       su = tf.reduce_prod(t, [0, 1, 2, 3])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  x.shape,
-                                                  su,
-                                                  [1],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, x.shape, su, [1], x_init_value=x, delta=1)
       self.assertAllClose(jacob_t, jacob_n, rtol=1e-3, atol=1e-3)
 
       su = tf.reduce_prod(t, 0)
@@ -491,19 +487,19 @@ class ProdReductionTest(tf.test.TestCase):
     self._compareGradient(x)
     # Zero at beginning
     x1 = x.copy()
-    x1[:,:,0,:] = 0
+    x1[:, :, 0, :] = 0
     self._compareGradient(x1)
     # Zero at end
     x2 = x.copy()
-    x2[:,:,-1,:] = 0
+    x2[:, :, -1, :] = 0
     self._compareGradient(x2)
     # Zero in middle
     x3 = x.copy()
-    x3[:,:,2,:] = 0
+    x3[:, :, 2, :] = 0
     self._compareGradient(x3)
     # All zeros
     x4 = x.copy()
-    x4[:,:,:,:] = 0
+    x4[:, :, :, :] = 0
     self._compareGradient(x4)
 
   def testEmptyGradients(self):
@@ -546,6 +542,13 @@ class MinReductionTest(tf.test.TestCase):
     self._compare(x, reduction_axes, True, use_gpu=True)
     self._compare(x, reduction_axes, True, use_gpu=False)
 
+  def testInfinity(self):
+    for dtype in [np.float32, np.float64]:
+      for special_value_x in [-np.inf, np.inf]:
+        for special_value_y in [-np.inf, np.inf]:
+          np_arr = np.array([special_value_x, special_value_y]).astype(dtype)
+          self._compareAll(np_arr, None)
+
   def testFloatReduce3D(self):
     # Create a 3D array of floats and reduce across all possible
     # dimensions
@@ -580,12 +583,8 @@ class MinReductionTest(tf.test.TestCase):
     with self.test_session():
       t = tf.convert_to_tensor(x)
       su = tf.reduce_min(t, [1, 2])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  s,
-                                                  su,
-                                                  [2, 2],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, s, su, [2, 2], x_init_value=x, delta=1)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 
   def testGradient2(self):
@@ -594,12 +593,8 @@ class MinReductionTest(tf.test.TestCase):
     with self.test_session():
       t = tf.convert_to_tensor(x)
       su = tf.reduce_min(t, [1])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  s,
-                                                  su,
-                                                  [2, 4, 2],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, s, su, [2, 4, 2], x_init_value=x, delta=1)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 
   def testGradient3(self):
@@ -608,12 +603,8 @@ class MinReductionTest(tf.test.TestCase):
     with self.test_session():
       t = tf.convert_to_tensor(x)
       su = tf.reduce_min(t, [2])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  s,
-                                                  su,
-                                                  [2, 3, 2],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, s, su, [2, 3, 2], x_init_value=x, delta=1)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 
   def testGradient4(self):
@@ -622,12 +613,8 @@ class MinReductionTest(tf.test.TestCase):
     with self.test_session():
       t = tf.convert_to_tensor(x)
       su = tf.reduce_min(t)
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  s,
-                                                  su,
-                                                  [1],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, s, su, [1], x_init_value=x, delta=1)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 
   def testEmptyGradients(self):
@@ -661,6 +648,13 @@ class MaxReductionTest(tf.test.TestCase):
     self._compare(x, reduction_axes, True, use_gpu=True)
     self._compare(x, reduction_axes, True, use_gpu=False)
 
+  def testInfinity(self):
+    for dtype in [np.float32, np.float64]:
+      for special_value_x in [-np.inf, np.inf]:
+        for special_value_y in [-np.inf, np.inf]:
+          np_arr = np.array([special_value_x, special_value_y]).astype(dtype)
+          self._compareAll(np_arr, None)
+
   def testFloatReduce3D(self):
     # Create a 3D array of floats and reduce across all possible
     # dimensions
@@ -695,12 +689,8 @@ class MaxReductionTest(tf.test.TestCase):
     with self.test_session():
       t = tf.convert_to_tensor(x)
       su = tf.reduce_max(t, [1, 2])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  s,
-                                                  su,
-                                                  [2, 2],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, s, su, [2, 2], x_init_value=x, delta=1)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 
   def testGradient2(self):
@@ -709,12 +699,8 @@ class MaxReductionTest(tf.test.TestCase):
     with self.test_session():
       t = tf.convert_to_tensor(x)
       su = tf.reduce_max(t, [1])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  s,
-                                                  su,
-                                                  [2, 4, 2],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, s, su, [2, 4, 2], x_init_value=x, delta=1)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 
   def testGradient3(self):
@@ -723,12 +709,8 @@ class MaxReductionTest(tf.test.TestCase):
     with self.test_session():
       t = tf.convert_to_tensor(x)
       su = tf.reduce_max(t, [2])
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  s,
-                                                  su,
-                                                  [2, 3, 2],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, s, su, [2, 3, 2], x_init_value=x, delta=1)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 
   def testGradient4(self):
@@ -737,12 +719,8 @@ class MaxReductionTest(tf.test.TestCase):
     with self.test_session():
       t = tf.convert_to_tensor(x)
       su = tf.reduce_max(t)
-      jacob_t, jacob_n = tf.test.compute_gradient(t,
-                                                  s,
-                                                  su,
-                                                  [1],
-                                                  x_init_value=x,
-                                                  delta=1)
+      jacob_t, jacob_n = tf.test.compute_gradient(
+          t, s, su, [1], x_init_value=x, delta=1)
     self.assertAllClose(jacob_t, jacob_n, rtol=1e-8, atol=1e-8)
 
   def testEmptyGradients(self):
diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py
index 89ed687cebf..07ce4e193b1 100644
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@@ -1631,8 +1631,14 @@ class RawRNNTest(tf.test.TestCase):
       inputs_ta = tf.TensorArray(dtype=tf.float32, size=tf.shape(inputs)[0])
       inputs_ta = inputs_ta.unpack(inputs)
 
-      def loop_fn(time_, cell_output, unused_loop_state):
+      cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
+
+      def loop_fn(time_, cell_output, cell_state, unused_loop_state):
         emit_output = cell_output  # == None for time == 0
+        if cell_output is None:  # time == 0
+          next_state = cell.zero_state(batch_size, tf.float32)
+        else:
+          next_state = cell_state  # copy state through
         elements_finished = (time_ >= sequence_length)
         finished = tf.reduce_all(elements_finished)
         # For the very final iteration, we must emit a dummy input
@@ -1640,16 +1646,14 @@ class RawRNNTest(tf.test.TestCase):
             finished,
             lambda: tf.zeros([batch_size, input_depth], dtype=tf.float32),
             lambda: inputs_ta.read(time_))
-        return (elements_finished, next_input, emit_output, None)
+        return (elements_finished, next_input, next_state, emit_output, None)
 
-      cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-      initial_state = cell.zero_state(batch_size, tf.float32)
-      outputs_ta, final_state, _ = tf.nn.raw_rnn(cell, loop_fn, initial_state)
+      outputs_ta, final_state, _ = tf.nn.raw_rnn(cell, loop_fn)
       outputs = outputs_ta.pack()
 
       tf.get_variable_scope().reuse_variables()
       outputs_dynamic_rnn, final_state_dynamic_rnn = tf.nn.dynamic_rnn(
-          cell, inputs, time_major=True, initial_state=initial_state,
+          cell, inputs, time_major=True, dtype=tf.float32,
           sequence_length=sequence_length)
 
       variables = tf.trainable_variables()
@@ -1717,11 +1721,15 @@ class RawRNNTest(tf.test.TestCase):
       inputs_ta = tf.TensorArray(dtype=tf.float32, size=tf.shape(inputs)[0])
       inputs_ta = inputs_ta.unpack(inputs)
 
-      def loop_fn(time_, cell_output, loop_state):
+      cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
+
+      def loop_fn(time_, cell_output, cell_state, loop_state):
         if cell_output is None:
           loop_state = tf.constant([0])
+          next_state = cell.zero_state(batch_size, tf.float32)
         else:
           loop_state = tf.pack([tf.squeeze(loop_state) + 1])
+          next_state = cell_state
         emit_output = cell_output  # == None for time == 0
         elements_finished = tf.tile([time_ >= max_time], [batch_size])
         finished = tf.reduce_all(elements_finished)
@@ -1730,11 +1738,10 @@ class RawRNNTest(tf.test.TestCase):
             finished,
             lambda: tf.zeros([batch_size, input_depth], dtype=tf.float32),
             lambda: inputs_ta.read(time_))
-        return (elements_finished, next_input, emit_output, loop_state)
+        return (elements_finished, next_input,
+                next_state, emit_output, loop_state)
 
-      cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-      initial_state = cell.zero_state(batch_size, tf.float32)
-      r = tf.nn.raw_rnn(cell, loop_fn, initial_state)
+      r = tf.nn.raw_rnn(cell, loop_fn)
       loop_state = r[-1]
       self.assertEqual([10], loop_state.eval())
 
@@ -1749,14 +1756,17 @@ class RawRNNTest(tf.test.TestCase):
       inputs_ta = tf.TensorArray(dtype=tf.float32, size=tf.shape(inputs)[0])
       inputs_ta = inputs_ta.unpack(inputs)
 
-      def loop_fn(time_, cell_output, loop_state):
+      cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
+      def loop_fn(time_, cell_output, cell_state, loop_state):
         if cell_output is None:
           loop_state = tf.TensorArray(
               dynamic_size=True, size=0, dtype=tf.int32, clear_after_read=False)
           loop_state = loop_state.write(0, 1)
+          next_state = cell.zero_state(batch_size, tf.float32)
         else:
           loop_state = loop_state.write(
               time_, loop_state.read(time_ - 1) + time_)
+          next_state = cell_state
         emit_output = cell_output  # == None for time == 0
         elements_finished = tf.tile([time_ >= max_time], [batch_size])
         finished = tf.reduce_all(elements_finished)
@@ -1765,11 +1775,10 @@ class RawRNNTest(tf.test.TestCase):
             finished,
             lambda: tf.zeros([batch_size, input_depth], dtype=tf.float32),
             lambda: inputs_ta.read(time_))
-        return (elements_finished, next_input, emit_output, loop_state)
+        return (elements_finished, next_input,
+                next_state, emit_output, loop_state)
 
-      cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-      initial_state = cell.zero_state(batch_size, tf.float32)
-      r = tf.nn.raw_rnn(cell, loop_fn, initial_state)
+      r = tf.nn.raw_rnn(cell, loop_fn)
       loop_state = r[-1]
       loop_state = loop_state.pack()
       self.assertAllEqual([1, 2, 2 + 2, 4 + 3, 7 + 4], loop_state.eval())
@@ -1785,14 +1794,16 @@ class RawRNNTest(tf.test.TestCase):
       inputs_ta = tf.TensorArray(dtype=tf.float32, size=tf.shape(inputs)[0])
       inputs_ta = inputs_ta.unpack(inputs)
 
-      def loop_fn(time_, cell_output, _):
+      cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
+      def loop_fn(time_, cell_output, cell_state, _):
         if cell_output is None:
           emit_output = (tf.zeros([2, 3], dtype=tf.int32),
                          tf.zeros([1], dtype=tf.int64))
+          next_state = cell.zero_state(batch_size, tf.float32)
         else:
           emit_output = (tf.ones([batch_size, 2, 3], dtype=tf.int32),
                          tf.ones([batch_size, 1], dtype=tf.int64))
-
+          next_state = cell_state
         elements_finished = tf.tile([time_ >= max_time], [batch_size])
         finished = tf.reduce_all(elements_finished)
         # For the very final iteration, we must emit a dummy input
@@ -1800,11 +1811,9 @@ class RawRNNTest(tf.test.TestCase):
             finished,
             lambda: tf.zeros([batch_size, input_depth], dtype=tf.float32),
             lambda: inputs_ta.read(time_))
-        return (elements_finished, next_input, emit_output, None)
+        return (elements_finished, next_input, next_state, emit_output, None)
 
-      cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-      initial_state = cell.zero_state(batch_size, tf.float32)
-      r = tf.nn.raw_rnn(cell, loop_fn, initial_state)
+      r = tf.nn.raw_rnn(cell, loop_fn)
       output_ta = r[0]
       self.assertEqual(2, len(output_ta))
       self.assertEqual([tf.int32, tf.int64], [ta.dtype for ta in output_ta])
@@ -1848,8 +1857,14 @@ class RawRNNTest(tf.test.TestCase):
       inputs_ta = tf.TensorArray(dtype=tf.float32, size=tf.shape(inputs)[0])
       inputs_ta = inputs_ta.unpack(inputs)
 
-      def loop_fn(time_, cell_output, unused_loop_state):
+      cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
+      def loop_fn(time_, cell_output, cell_state, unused_loop_state):
         emit_output = cell_output  # == None for time == 0
+        if cell_output is None:  # time == 0
+          next_state = cell.zero_state(batch_size, tf.float32)
+        else:
+          next_state = cell_state
+
         elements_finished = (time_ >= sequence_length)
         finished = tf.reduce_all(elements_finished)
         # For the very final iteration, we must emit a dummy input
@@ -1857,11 +1872,9 @@ class RawRNNTest(tf.test.TestCase):
             finished,
             lambda: tf.zeros([batch_size, input_depth], dtype=tf.float32),
             lambda: inputs_ta.read(time_))
-        return (elements_finished, next_input, emit_output, None)
+        return (elements_finished, next_input, next_state, emit_output, None)
 
-      cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-      initial_state = cell.zero_state(batch_size, tf.float32)
-      return tf.nn.raw_rnn(cell, loop_fn, initial_state, scope=scope)
+      return tf.nn.raw_rnn(cell, loop_fn, scope=scope)
 
     self._testScope(factory, use_outer_scope=True)
     self._testScope(factory, use_outer_scope=False)
diff --git a/tensorflow/python/kernel_tests/svd_op_test.py b/tensorflow/python/kernel_tests/svd_op_test.py
index 512bd79433f..358a73f25c5 100644
--- a/tensorflow/python/kernel_tests/svd_op_test.py
+++ b/tensorflow/python/kernel_tests/svd_op_test.py
@@ -26,13 +26,16 @@ class SvdOpTest(tf.test.TestCase):
   def testWrongDimensions(self):
     # The input to svd should be 2-dimensional tensor.
     scalar = tf.constant(1.)
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegexp(ValueError,
+                                 "Shape must be rank 2 but is rank 0"):
       tf.svd(scalar)
     vector = tf.constant([1., 2.])
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegexp(ValueError,
+                                 "Shape must be rank 2 but is rank 1"):
       tf.svd(vector)
     tensor = tf.constant([[[1., 2.], [3., 4.]], [[1., 2.], [3., 4.]]])
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegexp(ValueError,
+                                 "Shape must be rank 2 but is rank 3"):
       tf.svd(tensor)
     scalar = tf.constant(1. + 1.0j)
     with self.assertRaises(ValueError):
@@ -47,10 +50,12 @@ class SvdOpTest(tf.test.TestCase):
 
     # The input to batch_svd should be a tensor of at least rank 2.
     scalar = tf.constant(1.)
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegexp(ValueError,
+                                 "Shape must be at least rank 2 but is rank 0"):
       tf.batch_svd(scalar)
     vector = tf.constant([1., 2.])
-    with self.assertRaises(ValueError):
+    with self.assertRaisesRegexp(ValueError,
+                                 "Shape must be at least rank 2 but is rank 1"):
       tf.batch_svd(vector)
     scalar = tf.constant(1. + 1.0j)
     with self.assertRaises(ValueError):
@@ -62,8 +67,11 @@ class SvdOpTest(tf.test.TestCase):
 
 def _GetSvdOpTest(dtype_, shape_):
 
+  is_complex = dtype_ in (np.complex64, np.complex128)
+  is_single = dtype_ in (np.float32, np.complex64)
+
   def CompareSingularValues(self, x, y):
-    if dtype_ in (np.float32, np.complex64):
+    if is_single:
       tol = 5e-5
     else:
       tol = 1e-14
@@ -73,10 +81,10 @@ def _GetSvdOpTest(dtype_, shape_):
                         atol=(np.imag(x)[0] + np.imag(y)[0]) * tol)
 
   def CompareSingularVectors(self, x, y, rank):
-    if dtype_ in (np.float32, np.complex64):
+    if is_single:
       atol = 5e-4
     else:
-      atol = 1e-14
+      atol = 5e-14
     # We only compare the first 'rank' singular vectors since the
     # remainder form an arbitrary orthonormal basis for the
     # (row- or column-) null space, whose exact value depends on
@@ -128,7 +136,7 @@ def _GetSvdOpTest(dtype_, shape_):
     # Tests that x[...,:,:]^H * x[...,:,:] is close to the identity.
     xx = tf.batch_matmul(x, x, adj_x=True)
     identity = tf.batch_matrix_band_part(tf.ones_like(xx), 0, 0)
-    if dtype_ in (np.float32, np.complex64):
+    if is_single:
       tol = 1e-5
     else:
       tol = 1e-14
@@ -139,6 +147,7 @@ def _GetSvdOpTest(dtype_, shape_):
 
   def Test(self):
     np.random.seed(1)
+
     if dtype_ in (np.float32, np.float64):
       x = np.random.uniform(low=-1.0, high=1.0,
                             size=np.prod(shape_)).reshape(shape_).astype(dtype_)
@@ -152,6 +161,7 @@ def _GetSvdOpTest(dtype_, shape_):
                         size=np.prod(shape_)).reshape(shape_).astype(np.float64)
       + 1j * np.random.uniform(low=-1.0, high=1.0,
                         size=np.prod(shape_)).reshape(shape_).astype(np.float64)
+
     for compute_uv in False, True:
       for full_matrices in False, True:
         with self.test_session():
@@ -186,8 +196,8 @@ def _GetSvdOpTest(dtype_, shape_):
           CompareSingularValues(self, np_s, tf_s.eval())
           if compute_uv:
             CompareSingularVectors(self, np_u, tf_u.eval(), min(shape_[-2:]))
-            CompareSingularVectors(self, np.swapaxes(np_v, -2, -1), tf_v.eval(),
-                                   min(shape_[-2:]))
+            CompareSingularVectors(self, np.conj(np.swapaxes(np_v, -2, -1)),
+                                   tf_v.eval(), min(shape_[-2:]))
             CheckApproximation(self, x, tf_u, tf_s, tf_v, full_matrices)
             CheckUnitary(self, tf_u)
             CheckUnitary(self, tf_v)
@@ -195,12 +205,12 @@ def _GetSvdOpTest(dtype_, shape_):
   return Test
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
   for dtype in np.float32, np.float64, np.complex64, np.complex128:
     for rows in 1, 2, 5, 10, 32, 100:
       for cols in 1, 2, 5, 10, 32, 100:
         for batch_dims in [(), (3,)] + [(3, 2)] * (max(rows, cols) < 10):
           shape = batch_dims + (rows, cols)
-          name = '%s_%s' % (dtype.__name__, '_'.join(map(str, shape)))
-          setattr(SvdOpTest, 'testSvd_' + name, _GetSvdOpTest(dtype, shape))
+          name = "%s_%s" % (dtype.__name__, "_".join(map(str, shape)))
+          setattr(SvdOpTest, "testSvd_" + name, _GetSvdOpTest(dtype, shape))
   tf.test.main()
diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py
index 1cf54de465e..7e3fb6edb55 100644
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@@ -379,9 +379,6 @@ def _TileGrad(op, grad):
   return [input_grad, None]
 
 
-ops.NoGradient("TileGrad")
-
-
 ops.NoGradient("BroadcastGradientArgs")
 
 
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 85f1a91d353..cf01f2a287e 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -104,7 +104,9 @@ _baseslice = slice
 # Aliases for some automatically-generated names.
 listdiff = gen_array_ops.list_diff
 
+
 def shape(input, name=None):
+  # pylint: disable=redefined-builtin
   """Returns the shape of a tensor.
 
   This operation returns a 1-D integer tensor representing the shape of `input`.
@@ -127,6 +129,7 @@ def shape(input, name=None):
 
 
 def shape_internal(input, name=None, optimize=True):
+  # pylint: disable=redefined-builtin
   """Returns the shape of a tensor.
 
   Args:
@@ -138,7 +141,7 @@ def shape_internal(input, name=None, optimize=True):
     A `Tensor` of type `int32`.
   """
   with ops.name_scope(name, "Shape", [input]) as name:
-    if isinstance(input, ops.SparseTensor):
+    if isinstance(input, (ops.SparseTensor, ops.SparseTensorValue)):
       return gen_math_ops.cast(input.shape, dtypes.int32)
     else:
       input_tensor = ops.convert_to_tensor(input)
@@ -149,6 +152,7 @@ def shape_internal(input, name=None, optimize=True):
 
 
 def size(input, name=None):
+  # pylint: disable=redefined-builtin
   """Returns the size of a tensor.
 
   This operation returns an integer representing the number of elements in
@@ -172,6 +176,7 @@ def size(input, name=None):
 
 
 def size_internal(input, name=None, optimize=True):
+  # pylint: disable=redefined-builtin,protected-access
   """Returns the size of a tensor.
 
   Args:
@@ -183,7 +188,7 @@ def size_internal(input, name=None, optimize=True):
     A `Tensor` of type `int32`.
   """
   with ops.name_scope(name, "Size", [input]) as name:
-    if isinstance(input, ops.SparseTensor):
+    if isinstance(input, (ops.SparseTensor, ops.SparseTensorValue)):
       return gen_math_ops._prod(gen_math_ops.cast(input.shape, dtypes.int32), 0,
                                 name=name)
     else:
@@ -195,6 +200,7 @@ def size_internal(input, name=None, optimize=True):
 
 
 def rank(input, name=None):
+  # pylint: disable=redefined-builtin
   """Returns the rank of a tensor.
 
   This operation returns an integer representing the rank of `input`.
@@ -222,6 +228,7 @@ def rank(input, name=None):
 
 
 def rank_internal(input, name=None, optimize=True):
+  # pylint: disable=redefined-builtin
   """Returns the rank of a tensor.
 
   Args:
@@ -233,7 +240,7 @@ def rank_internal(input, name=None, optimize=True):
     A `Tensor` of type `int32`.
   """
   with ops.name_scope(name, "Rank", [input]) as name:
-    if isinstance(input, ops.SparseTensor):
+    if isinstance(input, (ops.SparseTensor, ops.SparseTensorValue)):
       return gen_array_ops.size(input.shape, name=name)
     else:
       input_tensor = ops.convert_to_tensor(input)
@@ -341,6 +348,7 @@ def _SliceHelper(tensor, slice_spec, var=None):
 
 # pylint: disable=undefined-variable,protected-access
 def slice(input_, begin, size, name=None):
+  # pylint: disable=redefined-builtin
   """Extracts a slice from a tensor.
 
   This operation extracts a slice of size `size` from a tensor `input` starting
@@ -784,24 +792,10 @@ def concat(concat_dim, values, name="concat"):
                                name=name)
 
 
-@ops.RegisterShape("Pack")
-def _PackShape(op):
-  input_shape = op.inputs[0].get_shape()
-  if input_shape.ndims is None:
-    return [tensor_shape.unknown_shape()]
-
-  for inp in op.inputs[1:]:
-    input_shape = input_shape.merge_with(inp.get_shape())
-
-  input_shape = input_shape.as_list()
-  axis = op.get_attr("axis")
-  if axis < 0: axis += len(input_shape) + 1
-  input_shape.insert(axis, len(op.inputs))
-  return [tensor_shape.TensorShape(input_shape)]
-
-
+ops.RegisterShape("Pack")(common_shapes.call_cpp_shape_fn)
 ops.RegisterShape("Unpack")(common_shapes.call_cpp_shape_fn)
 
+
 @ops.RegisterShape("Concat")
 def _ConcatShape(op):
   concat_dim = tensor_util.constant_value(op.inputs[0])
@@ -1603,6 +1597,7 @@ def _StridedSliceGradShape(op):
 
 ops.RegisterShape("StridedSliceAssign")(common_shapes.unchanged_shape)
 
+
 @ops.RegisterShape("StridedSlice")
 def _StridedSliceShape(op):
   """Shape function for array_ops.slice."""
@@ -1699,7 +1694,14 @@ def _StridedSliceShape(op):
   return [tensor_shape.TensorShape(final_shape)]
 
 
-ops.RegisterShape("Gather")(common_shapes.call_cpp_shape_fn)
+@ops.RegisterShape("Gather")
+def _GatherShape(op):
+  """Shape function for array_ops.gather."""
+  params_shape = op.inputs[0].get_shape()
+  indices_shape = op.inputs[1].get_shape()
+  return [indices_shape.concatenate(params_shape[1:])]
+
+
 ops.RegisterShape("GatherNd")(common_shapes.call_cpp_shape_fn)
 ops.RegisterShape("Unique")(common_shapes.call_cpp_shape_fn)
 ops.RegisterShape("UniqueWithCounts")(common_shapes.call_cpp_shape_fn)
@@ -2057,10 +2059,10 @@ def edit_distance(hypothesis, truth, normalize=True, name="edit_distance"):
   Raises:
     TypeError: If either `hypothesis` or `truth` are not a `SparseTensor`.
   """
-  if not isinstance(hypothesis, ops.SparseTensor):
-    raise TypeError("Hypothesis must be a SparseTensor")
-  if not isinstance(truth, ops.SparseTensor):
-    raise TypeError("Truth must be a SparseTensor")
+  if not isinstance(hypothesis, (ops.SparseTensor, ops.SparseTensorValue)):
+    raise TypeError("Hypothesis must be a SparseTensor.")
+  if not isinstance(truth, (ops.SparseTensor, ops.SparseTensorValue)):
+    raise TypeError("Truth must be a SparseTensor.")
 
   return gen_array_ops._edit_distance(hypothesis.indices,
                                       hypothesis.values,
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index cf708f1a8a6..bd5d9b1b447 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -900,6 +900,7 @@ class GradLoopState(object):
         branch = (1 - cond_ctxt.branch) if dead_branch else cond_ctxt.branch
         history_value = _SwitchRefOrTensor(history_value, pred)[branch]
       pop = gen_data_flow_ops._stack_pop(history_value, value.dtype.base_dtype)
+      pop.set_shape(value.get_shape())
       self.grad_context.Exit()
     parallel_iterations = self.grad_context.parallel_iterations
     if parallel_iterations > 1:
@@ -1412,6 +1413,9 @@ class CondContext(ControlFlowContext):
       self.GetWhileContext().back_prop
     return False
 
+  def GetControlPivot(self):
+    return self._pivot
+
   def AddValue(self, val):
     """Add `val` to the current context and its outer context recursively."""
     if val.name in self._values:
@@ -1984,8 +1988,7 @@ class WhileContext(ControlFlowContext):
         if self.outer_context: self.outer_context.Exit()
       else:
         shape_acc = array_ops.zeros_like(
-            array_ops.shape_internal(
-                op.inputs[0], optimize=False),
+            array_ops.shape_internal(op.inputs[0], optimize=False),
             optimize=False)
 
     if self.outer_context: self.outer_context.Exit()
@@ -2056,6 +2059,13 @@ class WhileContext(ControlFlowContext):
                            parallel_iterations=self._parallel_iterations,
                            use_input_shape=(shape_invariants is None))
                     for x in real_vars]
+    if self._outer_context:
+      control_pivot = self._outer_context.GetControlPivot().op
+      for var in enter_vars:
+        if _IsLoopConstantEnter(var.op.inputs[0].op):
+          # pylint: disable=protected-access
+          var.op._add_control_input(control_pivot)
+          # pylint: enable=protected-access
     _SetShapeInvariants(real_vars, enter_vars, shape_invariants)
 
     # Fix the control inputs and control flow context of these enter ops.
diff --git a/tensorflow/python/ops/gradients.py b/tensorflow/python/ops/gradients.py
index 4a7c9d7d8ce..400ec0277a7 100644
--- a/tensorflow/python/ops/gradients.py
+++ b/tensorflow/python/ops/gradients.py
@@ -235,9 +235,10 @@ def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops):
   return grad_ys
 
 
-def _IsFloat(tensor):
+def _IsTrainable(tensor):
   dtype = dtypes.as_dtype(tensor.dtype)
-  return dtype.base_dtype in (dtypes.float32, dtypes.float64)
+  return dtype.base_dtype in (dtypes.float16, dtypes.float32, dtypes.float64,
+                              dtypes.complex64, dtypes.complex128)
 
 
 def _VerifyGeneratedGradients(grads, op):
@@ -409,7 +410,7 @@ def gradients(ys,
     if loop_state:
       loop_exits = loop_state.ProcessUnusedLoopExits(pending_count, to_ops_set)
       for y in loop_exits:
-        if _IsFloat(y):
+        if _IsTrainable(y):
           _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
           queue.append(y.op)
 
@@ -451,7 +452,7 @@ def gradients(ys,
           # therefore dC/doutput[i] is 0.
           for i, out_grad in enumerate(out_grads):
             if (not isinstance(out_grad, ops.Tensor)
-                and not out_grad) and _IsFloat(op.outputs[i]):
+                and not out_grad) and _IsTrainable(op.outputs[i]):
               # Only floating-point outputs get a zero gradient. Gradient
               # functions should ignore the gradient for other outputs.
               if loop_state:
@@ -541,7 +542,7 @@ def _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count, loop_state):
             # For an unused exit, if it has floating-point outputs, backprop
             # a zero gradient. Otherwise, just ignore it.
             for y in grad_state.unused_exits:
-              if _IsFloat(y):
+              if _IsTrainable(y):
                 _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
               queue.append(y.op)
           else:
diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt
index e243720cabf..10816e092c9 100644
--- a/tensorflow/python/ops/hidden_ops.txt
+++ b/tensorflow/python/ops/hidden_ops.txt
@@ -121,7 +121,14 @@ TFRecordReader
 WholeFileReader
 
 # linalg_ops
-# (None)
+BatchMatrixSolveLs
+BatchSelfAdjointEig
+BatchSelfAdjointEigV2
+BatchSvd
+MatrixSolveLs
+SelfAdjointEig
+SelfAdjointEigV2
+Svd
 
 # logging_ops
 Assert
@@ -141,6 +148,7 @@ All
 Any
 BatchMatMul
 Complex
+Conj
 Max
 Mean
 Min
diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py
index 7c102390432..e45d7e69e6d 100644
--- a/tensorflow/python/ops/linalg_grad.py
+++ b/tensorflow/python/ops/linalg_grad.py
@@ -30,15 +30,6 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 
-ops.NoGradient("CholeskyGrad")
-ops.NoGradient("BatchCholeskyGrad")
-ops.NoGradient("SelfAdjointEig")
-ops.NoGradient("BatchSelfAdjointEig")
-ops.NoGradient("SelfAdjointEigV2")
-ops.NoGradient("BatchSelfAdjointEigV2")
-ops.NoGradient("Svd")
-ops.NoGradient("BatchSvd")
-
 
 @ops.RegisterGradient("MatrixInverse")
 def _MatrixInverseGrad(op, grad):
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index bd753c12ec3..f3db3229ee3 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import gen_linalg_ops
+from tensorflow.python.ops import math_ops
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_linalg_ops import *
@@ -179,11 +180,9 @@ def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None):
     output: Matrix of shape `[N, K]` containing the matrix that solves
       `matrix * output = rhs` in the least-squares sense.
   """
-  return gen_linalg_ops.matrix_solve_ls(matrix,
-                                        rhs,
-                                        l2_regularizer,
-                                        fast=fast,
-                                        name=name)
+  # pylint: disable=protected-access
+  return gen_linalg_ops._matrix_solve_ls(
+      matrix, rhs, l2_regularizer, fast=fast, name=name)
 
 
 def batch_matrix_solve_ls(matrix,
@@ -241,11 +240,9 @@ def batch_matrix_solve_ls(matrix,
       `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]` in the least
       squares sense.
   """
-  return gen_linalg_ops.batch_matrix_solve_ls(matrix,
-                                              rhs,
-                                              l2_regularizer,
-                                              fast=fast,
-                                              name=name)
+  # pylint: disable=protected-access
+  return gen_linalg_ops._batch_matrix_solve_ls(
+      matrix, rhs, l2_regularizer, fast=fast, name=name)
 
 
 def self_adjoint_eig(matrix, name=None):
@@ -263,7 +260,8 @@ def self_adjoint_eig(matrix, name=None):
     v: Eigenvectors. Shape is `[N, N]`. The columns contain the eigenvectors of
       `matrix`.
   """
-  e, v = gen_linalg_ops.self_adjoint_eig_v2(matrix, compute_v=True, name=name)
+  # pylint: disable=protected-access
+  e, v = gen_linalg_ops._self_adjoint_eig_v2(matrix, compute_v=True, name=name)
   return e, v
 
 
@@ -284,7 +282,8 @@ def batch_self_adjoint_eig(tensor, name=None):
     matrices
       contain eigenvectors of the corresponding matrices in `tensor`
   """
-  e, v = gen_linalg_ops.batch_self_adjoint_eig_v2(
+  # pylint: disable=protected-access
+  e, v = gen_linalg_ops._batch_self_adjoint_eig_v2(
       tensor, compute_v=True, name=name)
   return e, v
 
@@ -299,7 +298,8 @@ def self_adjoint_eigvals(matrix, name=None):
   Returns:
     e: Eigenvalues of `matrix`. Shape is `[N]`.
   """
-  e, _ = gen_linalg_ops.self_adjoint_eig_v2(matrix, compute_v=False, name=name)
+  # pylint: disable=protected-access
+  e, _ = gen_linalg_ops._self_adjoint_eig_v2(matrix, compute_v=False, name=name)
   return e
 
 
@@ -314,7 +314,8 @@ def batch_self_adjoint_eigvals(tensor, name=None):
     e: Eigenvalues. Shape is `[..., N]`. The vector `e[..., :]` contains the `N`
       eigenvalues of `tensor[..., :, :]`.
   """
-  e, _ = gen_linalg_ops.batch_self_adjoint_eig_v2(
+  # pylint: disable=protected-access
+  e, _ = gen_linalg_ops._batch_self_adjoint_eig_v2(
       tensor, compute_v=False, name=name)
   return e
 
@@ -353,13 +354,13 @@ def svd(matrix, compute_uv=True, full_matrices=False, name=None):
       shape is `[N, P]`. If `full_matrices` is `True` then shape is
       `[N, N]`. Not returned if `compute_uv` is `False`.
   """
-  s, u, v = gen_linalg_ops.svd(matrix,
-                               compute_uv=compute_uv,
-                               full_matrices=full_matrices)
+  # pylint: disable=protected-access
+  s, u, v = gen_linalg_ops._svd(
+      matrix, compute_uv=compute_uv, full_matrices=full_matrices)
   if compute_uv:
-    return s, u, v
+    return math_ops.real(s), u, v
   else:
-    return s
+    return math_ops.real(s)
 
 
 def batch_svd(tensor, compute_uv=True, full_matrices=False, name=None):
@@ -398,12 +399,13 @@ def batch_svd(tensor, compute_uv=True, full_matrices=False, name=None):
       shape is `[..., N, P]`. If `full_matrices` is `True` then shape is
       `[..., N, N]`. Not returned if `compute_uv` is `False`.
   """
-  s, u, v = gen_linalg_ops.batch_svd(
+  # pylint: disable=protected-access
+  s, u, v = gen_linalg_ops._batch_svd(
       tensor, compute_uv=compute_uv, full_matrices=full_matrices)
   if compute_uv:
-    return s, u, v
+    return math_ops.real(s), u, v
   else:
-    return s
+    return math_ops.real(s)
 
 
 # pylint: enable=invalid-name
diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py
index 5bf65b88deb..03ee067b228 100644
--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -30,12 +30,6 @@ from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import math_ops
 
 
-# Gradient ops that do not have gradients themselves.
-ops.NoGradient("SigmoidGrad")
-ops.NoGradient("TanhGrad")
-ops.NoGradient("InvGrad")
-ops.NoGradient("RsqrtGrad")
-
 def _safe_shape_div(x, y):
   """Divides `x / y` assuming `x, y >= 0`, treating `0 / 0 = 0`."""
   return x // math_ops.maximum(y, 1)
@@ -127,13 +121,15 @@ def _ProdGrad(op, grad):
 
   # Pack all reduced dimensions into a single one, so we can perform the
   # cumprod ops. If the reduction dims list is empty, it defaults to float32,
-  # so we need to cast here.
-  reduced = math_ops.cast(reduction_indices, dtypes.int32)
-  idx = math_ops.range(0, array_ops.rank(op.inputs[0]))
-  other, _ = array_ops.listdiff(idx, reduced)
-  perm = array_ops.concat(0, [reduced, other])
-  reduced_num = math_ops.reduce_prod(array_ops.gather(input_shape, reduced))
-  other_num = math_ops.reduce_prod(array_ops.gather(input_shape, other))
+  # so we need to cast here.  We put all the shape-related ops on CPU to avoid
+  # copying back and forth, and since listdiff is CPU only.
+  with ops.device("/cpu:0"):
+    reduced = math_ops.cast(reduction_indices, dtypes.int32)
+    idx = math_ops.range(0, array_ops.rank(op.inputs[0]))
+    other, _ = array_ops.listdiff(idx, reduced)
+    perm = array_ops.concat(0, [reduced, other])
+    reduced_num = math_ops.reduce_prod(array_ops.gather(input_shape, reduced))
+    other_num = math_ops.reduce_prod(array_ops.gather(input_shape, other))
   permuted = array_ops.transpose(op.inputs[0], perm)
   permuted_shape = array_ops.shape(permuted)
   reshaped = array_ops.reshape(permuted, (reduced_num, other_num))
@@ -245,23 +241,34 @@ def _AbsGrad(op, grad):
 @ops.RegisterGradient("Neg")
 def _NegGrad(_, grad):
   """Returns -grad."""
-  return - grad
+  return -grad
 
 
 @ops.RegisterGradient("Inv")
 def _InvGrad(op, grad):
   """Returns -grad * (1 / x^2)."""
   y = op.outputs[0]  # y = 1 / x
+  # pylint: disable=protected-access
   return gen_math_ops._inv_grad(y, grad)
 
 
+@ops.RegisterGradient("InvGrad")
+def _InvGradGrad(op, grad):
+  b = op.inputs[1]
+  # op.output[0]: y = -b * conj(a)^2
+  with ops.control_dependencies([grad.op]):
+    ca = math_ops.conj(op.inputs[0])
+    cg = math_ops.conj(grad)
+    # pylint: disable=protected-access
+    return cg * -2.0 * b * ca, gen_math_ops._inv_grad(ca, grad)
+
+
 @ops.RegisterGradient("Square")
 def _SquareGrad(op, grad):
   x = op.inputs[0]
   # Added control dependencies to prevent 2*x from being computed too early.
   with ops.control_dependencies([grad.op]):
-    if x.dtype.is_complex:
-      x = math_ops.conj(x)
+    x = math_ops.conj(x)
     return grad * (2.0 * x)
 
 
@@ -274,9 +281,10 @@ def _SqrtGrad(op, grad):
 @ops.RegisterGradient("SqrtGrad")
 def _SqrtGradGrad(op, grad):
   a = op.inputs[0]
-  y = op.outputs[0]  # y = 0.5 * b / a
+  y = op.outputs[0]  # y = 0.5 * b / conj(a)
   with ops.control_dependencies([grad.op]):
-    return -grad * y / a, 0.5 * grad / a
+    ga = grad / a
+    return -math_ops.conj(ga) * y, 0.5 * ga
 
 
 @ops.RegisterGradient("Rsqrt")
@@ -290,8 +298,7 @@ def _ExpGrad(op, grad):
   """Returns grad * exp(x)."""
   y = op.outputs[0]  # y = e^x
   with ops.control_dependencies([grad.op]):
-    if y.dtype.is_complex:
-      y = math_ops.conj(y)
+    y = math_ops.conj(y)
     return grad * y
 
 
@@ -300,6 +307,7 @@ def _LogGrad(op, grad):
   """Returns grad * (1/x)."""
   x = op.inputs[0]
   with ops.control_dependencies([grad.op]):
+    x = math_ops.conj(x)
     return grad * math_ops.inv(x)
 
 
@@ -308,18 +316,28 @@ def _TanhGrad(op, grad):
   """Returns grad * (1 - tanh(x) * tanh(x))."""
   y = op.outputs[0]  # y = tanh(x)
   with ops.control_dependencies([grad.op]):
-    if y.dtype.is_complex:
-      y = math_ops.conj(y)
+    y = math_ops.conj(y)
+    # pylint: disable=protected-access
     return gen_math_ops._tanh_grad(y, grad)
 
 
+@ops.RegisterGradient("TanhGrad")
+def _TanhGradGrad(op, grad):
+  with ops.control_dependencies([grad.op]):
+    a = math_ops.conj(op.inputs[0])
+    b = math_ops.conj(op.inputs[1])
+    # pylint: disable=protected-access
+    return grad * -2.0 * b * a, gen_math_ops._tanh_grad(a, grad)
+
+
 @ops.RegisterGradient("Erf")
 def _ErfGrad(op, grad):
   """Returns grad * 2/sqrt(pi) * exp(-x**2)."""
   x = op.inputs[0]
   two_over_root_pi = constant_op.constant(2 / np.sqrt(np.pi), dtype=grad.dtype)
   with ops.control_dependencies([grad.op]):
-    return  grad * two_over_root_pi * math_ops.exp(-math_ops.square(x))
+    x = math_ops.conj(x)
+    return grad * two_over_root_pi * math_ops.exp(-math_ops.square(x))
 
 
 @ops.RegisterGradient("Erfc")
@@ -329,7 +347,8 @@ def _ErfcGrad(op, grad):
   minus_two_over_root_pi = constant_op.constant(-2 / np.sqrt(np.pi),
                                                 dtype=grad.dtype)
   with ops.control_dependencies([grad.op]):
-    return  grad * minus_two_over_root_pi * math_ops.exp(-math_ops.square(x))
+    x = math_ops.conj(x)
+    return grad * minus_two_over_root_pi * math_ops.exp(-math_ops.square(x))
 
 
 @ops.RegisterGradient("Lgamma")
@@ -337,6 +356,7 @@ def _LgammaGrad(op, grad):
   """Returns grad * digamma(x)."""
   x = op.inputs[0]
   with ops.control_dependencies([grad.op]):
+    x = math_ops.conj(x)
     return grad * math_ops.digamma(x)
 
 
@@ -345,6 +365,7 @@ def _DigammaGrad(op, grad):
   """Compute gradient of the digamma function with respect to its argument."""
   x = op.inputs[0]
   with ops.control_dependencies([grad.op]):
+    x = math_ops.conj(x)
     return grad * math_ops.polygamma(array_ops.constant(1, dtype=x.dtype), x)
 
 
@@ -383,6 +404,8 @@ def _ZetaGrad(op, grad):
   unused_rx, rq = gen_array_ops._broadcast_gradient_args(sx, sq)
   # Evaluate gradient
   with ops.control_dependencies([grad.op]):
+    x = math_ops.conj(x)
+    q = math_ops.conj(q)
     partial_q = -x * math_ops.zeta(x + 1, q)
     return (None,
             array_ops.reshape(math_ops.reduce_sum(partial_q * grad, rq), sq))
@@ -400,6 +423,8 @@ def _PolygammaGrad(op, grad):
   unused_rn, rx = gen_array_ops._broadcast_gradient_args(sn, sx)
   # Evaluate gradient
   with ops.control_dependencies([grad.op]):
+    n = math_ops.conj(n)
+    x = math_ops.conj(x)
     partial_x = math_ops.polygamma(n + 1, x)
     return (None,
             array_ops.reshape(math_ops.reduce_sum(partial_x * grad, rx), sx))
@@ -410,11 +435,21 @@ def _SigmoidGrad(op, grad):
   """Returns grad * sigmoid(x) * (1 - sigmoid(x))."""
   y = op.outputs[0]  # y = sigmoid(x)
   with ops.control_dependencies([grad.op]):
-    if y.dtype.is_complex:
-      y = math_ops.conj(y)
+    y = math_ops.conj(y)
+    # pylint: disable=protected-access
     return gen_math_ops._sigmoid_grad(y, grad)
 
 
+@ops.RegisterGradient("SigmoidGrad")
+def _SigmoidGradGrad(op, grad):
+  with ops.control_dependencies([grad.op]):
+    a = math_ops.conj(op.inputs[0])
+    b = math_ops.conj(op.inputs[1])
+    gb = grad * b
+    # pylint: disable=protected-access
+    return gb - 2.0 * gb * a, gen_math_ops._sigmoid_grad(a, grad)
+
+
 @ops.RegisterGradient("Sign")
 def _SignGrad(op, _):
   """Returns 0."""
@@ -427,8 +462,7 @@ def _SinGrad(op, grad):
   """Returns grad * cos(x)."""
   x = op.inputs[0]
   with ops.control_dependencies([grad.op]):
-    if x.dtype.is_complex:
-      x = math_ops.conj(x)
+    x = math_ops.conj(x)
     return grad * math_ops.cos(x)
 
 
@@ -437,8 +471,7 @@ def _CosGrad(op, grad):
   """Returns grad * -sin(x)."""
   x = op.inputs[0]
   with ops.control_dependencies([grad.op]):
-    if x.dtype.is_complex:
-      x = math_ops.conj(x)
+    x = math_ops.conj(x)
     return -grad * math_ops.sin(x)
 
 
@@ -447,6 +480,7 @@ def _TanGrad(op, grad):
   """Returns grad * 1/sec^2(x)."""
   x = op.inputs[0]
   with ops.control_dependencies([grad.op]):
+    x = math_ops.conj(x)
     secx = math_ops.inv(math_ops.cos(x))
     secx2 = math_ops.square(secx)
     return grad * secx2
@@ -457,6 +491,7 @@ def _AsinGrad(op, grad):
   """Returns grad * 1/sqrt(1-x^2)."""
   x = op.inputs[0]
   with ops.control_dependencies([grad.op]):
+    x = math_ops.conj(x)
     x2 = math_ops.square(x)
     one = constant_op.constant(1, dtype=grad.dtype)
     den = math_ops.sqrt(math_ops.sub(one, x2))
@@ -469,6 +504,7 @@ def _AcosGrad(op, grad):
   """Returns grad * -1/sqrt(1-x^2)."""
   x = op.inputs[0]
   with ops.control_dependencies([grad.op]):
+    x = math_ops.conj(x)
     x2 = math_ops.square(x)
     one = constant_op.constant(1, dtype=grad.dtype)
     den = math_ops.sqrt(math_ops.sub(one, x2))
@@ -481,6 +517,7 @@ def _AtanGrad(op, grad):
   """Returns grad * 1/ (1 + x^2)"""
   x = op.inputs[0]
   with ops.control_dependencies([grad.op]):
+    x = math_ops.conj(x)
     x2 = math_ops.square(x)
     one = constant_op.constant(1, dtype=grad.dtype)
     inv = math_ops.inv(math_ops.add(one, x2))
@@ -525,9 +562,8 @@ def _MulGrad(op, grad):
   sx = array_ops.shape(x)
   sy = array_ops.shape(y)
   rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
-  if x.dtype.is_complex:
-    x = math_ops.conj(x)
-    y = math_ops.conj(y)
+  x = math_ops.conj(x)
+  y = math_ops.conj(y)
   return (array_ops.reshape(math_ops.reduce_sum(grad * y, rx), sx),
           array_ops.reshape(math_ops.reduce_sum(x * grad, ry), sy))
 
@@ -539,6 +575,8 @@ def _DivGrad(op, grad):
   sx = array_ops.shape(x)
   sy = array_ops.shape(y)
   rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)  # pylint: disable=protected-access
+  x = math_ops.conj(x)
+  y = math_ops.conj(y)
   return (array_ops.reshape(math_ops.reduce_sum(grad / y, rx), sx),
           array_ops.reshape(math_ops.reduce_sum(grad *
                                          (-x / math_ops.square(y)), ry), sy))
@@ -553,6 +591,9 @@ def _PowGrad(op, grad):
   sx = array_ops.shape(x)
   sy = array_ops.shape(y)
   rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
+  x = math_ops.conj(x)
+  y = math_ops.conj(y)
+  z = math_ops.conj(z)
   gx = array_ops.reshape(
       math_ops.reduce_sum(grad * y * math_ops.pow(x, y - 1), rx), sx)
   # Avoid false singularity at x = 0
diff --git a/tensorflow/python/ops/math_grad_test.py b/tensorflow/python/ops/math_grad_test.py
index 2927031e6c2..861ff642245 100644
--- a/tensorflow/python/ops/math_grad_test.py
+++ b/tensorflow/python/ops/math_grad_test.py
@@ -148,18 +148,5 @@ class SegmentMinOrMaxGradientTest(tf.test.TestCase):
       self.assertLess(error, 1e-4)
 
 
-class SqrtGradGradTest(tf.test.TestCase):
-
-  def testSqrtGradGrad(self):
-    inputs_numpy = np.array([0.5, 1.0, 2.0])
-    inputs = tf.constant(inputs_numpy, dtype=tf.float32)
-    sqrt = tf.sqrt(inputs)
-    sqrt_grad = tf.gradients(sqrt, inputs)[0]
-    with self.test_session():
-      error = tf.test.compute_gradient_error(inputs, [3], sqrt_grad, [3],
-                                             x_init_value=inputs_numpy)
-      self.assertLess(error, 1e-4)
-
-
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index fa6af4740e0..859cc1175f7 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -523,16 +523,20 @@ def real(input, name=None):
   tf.real(input) ==> [-2.25, 3.25]
   ```
 
+  If `input` is already real, it is returned unchanged.
+
   Args:
-    input: A `Tensor`. Must be one of the following types: `complex64`,
-         `complex128`.
+    input: A `Tensor`. Must have numeric type.
     name: A name for the operation (optional).
 
   Returns:
     A `Tensor` of type `float32` or `float64`.
   """
   with ops.name_scope(name, "Real", [input]) as name:
-    return gen_math_ops.real(input, Tout=input.dtype.real_dtype, name=name)
+    real_dtype = input.dtype.real_dtype
+    if input.dtype.base_dtype == real_dtype:
+      return input
+    return gen_math_ops.real(input, Tout=real_dtype, name=name)
 
 
 def imag(input, name=None):
@@ -974,13 +978,15 @@ def range(start, limit=None, delta=1, name="range"):
   ```
 
   Args:
-    start: A 0-D (scalar) of type `int32`. First entry in sequence.
-      Defaults to 0.
+    start: A 0-D (scalar) of type `int32`. Acts as first entry in the range if
+      `limit` is not None; otherwise, acts as range limit and first entry
+      defaults to 0.
     limit: A 0-D (scalar) of type `int32`. Upper limit of sequence,
-      exclusive.
-    delta: A 0-D `Tensor` (scalar) of type `int32`. Optional. Default is 1.
-      Number that increments `start`.
-    name: A name for the operation (optional).
+      exclusive. If None, defaults to the value of `start` while the first
+      entry of the range defaults to 0.
+    delta: A 0-D `Tensor` (scalar) of type `int32`. Number that increments
+      `start`. Defaults to 1.
+    name: A name for the operation. Defaults to "range".
 
   Returns:
     An 1-D `int32` `Tensor`.
@@ -1724,6 +1730,43 @@ def cumprod(x, axis=0, exclusive=False, reverse=False, name=None):
         x, axis, exclusive=exclusive, reverse=reverse, name=name)
 
 
+def conj(x, name=None):
+  r"""Returns the complex conjugate of a complex number.
+
+  Given a tensor `input` of complex numbers, this operation returns a tensor of
+  complex numbers that are the complex conjugate of each element in `input`. The
+  complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the
+  real part and *b* is the imaginary part.
+
+  The complex conjugate returned by this operation is of the form \\(a - bj\\).
+
+  For example:
+
+      # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+      tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
+
+  If `x` is real, it is returned unchanged.
+
+  Args:
+    x: `Tensor` to conjugate.  Must have numeric type.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` that is the conjugate of `x` (with the same type).
+
+  Raises:
+    TypeError: If `x` is not a numeric tensor.
+  """
+  with ops.name_scope(name, "Conj", [x]) as name:
+    x = ops.convert_to_tensor(x, name="x")
+    if x.dtype.is_complex:
+      return gen_math_ops._conj(x, name=name)
+    elif x.dtype.is_floating or x.dtype.is_integer:
+      return x
+    else:
+      raise TypeError("Expected numeric tensor, got dtype %r" % x.dtype)
+
+
 ops.RegisterShape("Abs")(common_shapes.unchanged_shape)
 ops.RegisterShape("Acos")(common_shapes.unchanged_shape)
 ops.RegisterShape("Asin")(common_shapes.unchanged_shape)
@@ -1802,7 +1845,6 @@ ops.RegisterShape("Sub")(common_shapes.call_cpp_shape_fn)
 ops.RegisterShape("SquaredDifference")(common_shapes.call_cpp_shape_fn)
 
 
-# TODO(cwhipkey): inline body into callers.
 def _BroadcastShape(op):
   """Common shape function for binary operators that broadcast their inputs."""
   return [common_shapes.broadcast_shape(
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 6222638bbe2..4ce6de033c7 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -844,6 +844,71 @@ ops.RegisterShape("AvgPool")(common_shapes.avg_pool_shape)
 ops.RegisterShape("MaxPool")(common_shapes.max_pool_shape)
 
 
+@ops.RegisterShape("FusedResizeAndPadConv2D")
+def _FusedResizeAndPadConv2DShape(op):
+  """Shape function for FusedResizeAndPadConv2D op."""
+  # The bilinear resize shape calculation.
+  input_shape = op.inputs[0].get_shape().with_rank(4)
+  unused_size_shape = op.inputs[1].get_shape().merge_with([2])
+  size = tensor_util.constant_value(op.inputs[1])
+  if size is not None:
+    height = size[0]
+    width = size[1]
+  else:
+    height = None
+    width = None
+  resized_shape = tensor_shape.TensorShape(
+      [input_shape[0], height, width, input_shape[3]])
+
+  # Calculates the effect of the padding.
+  paddings_shape = op.inputs[2].get_shape().with_rank(2)
+  resized_shape = resized_shape.with_rank(paddings_shape[0].value)
+  paddings_shape = paddings_shape.merge_with(
+      tensor_shape.matrix(resized_shape.ndims, 2))
+  paddings = tensor_util.constant_value(op.inputs[2])
+  if paddings is None:
+    padded_shape = tensor_shape.unknown_shape(ndims=resized_shape.ndims)
+  else:
+    output_dims = []
+    for i, dim in enumerate(resized_shape.dims):
+      if paddings[i, 0] < 0 or paddings[i, 1] < 0:
+        raise ValueError("paddings must be non-negative")
+      output_dims.append(dim + paddings[i, 0] + paddings[i, 1])
+    padded_shape = tensor_shape.TensorShape(output_dims)
+
+  # Finally work out the convolution's effect.
+  filter_shape = op.inputs[3].get_shape().with_rank(4)
+
+  batch_size = padded_shape[0]
+  in_rows = padded_shape[1]
+  in_cols = padded_shape[2]
+
+  filter_rows = filter_shape[0]
+  filter_cols = filter_shape[1]
+  depth_out = filter_shape[3]
+  # Check that the input depths are compatible.
+  padded_shape[3].assert_is_compatible_with(filter_shape[2])
+
+  stride_b, stride_r, stride_c, stride_d = op.get_attr("strides")
+
+  if stride_b != 1 or stride_d != 1:
+    raise ValueError("Current implementation does not yet support "
+                     "strides in the batch and depth dimensions.")
+  # TODO(mrry,shlens): Raise an error if the stride would cause
+  # information in the input to be ignored. This will require a change
+  # in the kernel implementation.
+  padding = op.get_attr("padding")
+  out_rows, out_cols = common_shapes.get2d_conv_output_size(in_rows, in_cols,
+                                                            filter_rows,
+                                                            filter_cols,
+                                                            stride_r,
+                                                            stride_c,
+                                                            padding)
+
+  output_shape = [batch_size, out_rows, out_cols, depth_out]
+  return [tensor_shape.TensorShape(output_shape)]
+
+
 @ops.RegisterShape("MaxPoolWithArgmax")
 def _MaxPoolWithArgMaxShape(op):
   """Shape function for MaxPoolWithArgmax op."""
diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
index 965ebd904f6..8d565f51777 100644
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -1016,7 +1016,7 @@ def _dynamic_rnn_loop(cell,
   return (final_outputs, final_state)
 
 
-def raw_rnn(cell, loop_fn, initial_state,
+def raw_rnn(cell, loop_fn,
             parallel_iterations=None, swap_memory=False, scope=None):
   """Creates an `RNN` specified by RNNCell `cell` and loop function `loop_fn`.
 
@@ -1034,16 +1034,18 @@ def raw_rnn(cell, loop_fn, initial_state,
   `TensorArray` objects directly.
 
   The operation of `raw_rnn`, in pseudo-code, is basically the following:
+
   ```
-  emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype)
   time = tf.constant(0, dtype=tf.int32)
-  (finished, next_input, _, loop_state) = loop_fn(
-      time=time, cell_output=None, loop_state=None)
+  (finished, next_input, initial_state, _, loop_state) = loop_fn(
+      time=time, cell_output=None, cell_state=None, loop_state=None)
+  emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype)
   state = initial_state
   while not all(finished):
-    (output, next_state) = cell(next_input, state)
-    (next_finished, next_input, emit, loop_state) = loop_fn(
-        time=time + 1, cell_output=output, loop_state=loop_state)
+    (output, cell_state) = cell(next_input, state)
+    (next_finished, next_input, next_state, emit, loop_state) = loop_fn(
+        time=time + 1, cell_output=output, cell_state=cell_state,
+        loop_state=loop_state)
     # Emit zeros and copy forward state for minibatch entries that are finished.
     state = tf.select(finished, state, next_state)
     emit = tf.select(finished, tf.zeros_like(emit), emit)
@@ -1067,8 +1069,14 @@ def raw_rnn(cell, loop_fn, initial_state,
   inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time)
   inputs_ta = inputs_ta.unpack(inputs)
 
-  def loop_fn(time, cell_output, loop_state):
+  cell = tf.nn.rnn_cell.LSTMCell(num_units)
+
+  def loop_fn(time, cell_output, cell_state, loop_state):
     emit_output = cell_output  # == None for time == 0
+    if cell_output is None:  # time == 0
+      next_cell_state = cell.zero_state(batch_size, tf.float32)
+    else:
+      next_cell_state = cell_state
     elements_finished = (time >= sequence_length)
     finished = tf.reduce_all(elements_finished)
     next_input = tf.cond(
@@ -1076,35 +1084,55 @@ def raw_rnn(cell, loop_fn, initial_state,
         lambda: tf.zeros([batch_size, input_depth], dtype=tf.float32),
         lambda: inputs_ta.read(time))
     next_loop_state = None
-    return (elements_finished, next_input, emit_output, next_loop_state)
+    return (elements_finished, next_input, next_cell_state,
+            emit_output, next_loop_state)
 
-  cell = tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True)
-  initial_state = cell.zero_state(batch_size, tf.float32)
-  outputs_ta, final_state, _ = raw_rnn(cell, loop_fn, initial_state)
+  outputs_ta, final_state, _ = raw_rnn(cell, loop_fn)
   outputs = outputs_ta.pack()
   ```
 
   Args:
     cell: An instance of RNNCell.
-    loop_fn: A callable that takes inputs `(time, cell_output, loop_state)` and
-      returns the tuple `(finished, next_input, emit_output, next_loop_state)`.
+    loop_fn: A callable that takes inputs
+      `(time, cell_output, cell_state, loop_state)`
+      and returns the tuple
+      `(finished, next_input, next_cell_state, emit_output, next_loop_state)`.
       Here `time` is an int32 scalar `Tensor`, `cell_output` is a
       `Tensor` or (possibly nested) tuple of tensors as determined by
-      `cell.output_size`.  In addition, `finished` is a boolean `Tensor` of
-      shape `[batch_size]`, `next_input` is the next input to feed to `cell`,
-      and `emit_output` is the output to store for this iteration.  Note that
-      `emit_output` should be a `Tensor` or (possibly nested) tuple of tensors
-      with shapes and structure matching `cell.output_size` and `cell_output`
-      above.  The parameter `loop_state` and output `next_loop_state` may be
-      either a single or (possibly nested) tuple of tensors.  This paramter
+      `cell.output_size`, and `cell_state` is a `Tensor`
+      or (possibly nested) tuple of tensors, as determined by the `loop_fn`
+      on its first call (and should match `cell.state_size`).
+      The outputs are: `finished`, a boolean `Tensor` of
+      shape `[batch_size]`, `next_input`: the next input to feed to `cell`,
+      `next_cell_state`: the next state to feed to `cell`,
+      and `emit_output`: the output to store for this iteration.
+
+      Note that `emit_output` should be a `Tensor` or (possibly nested)
+      tuple of tensors with shapes and structure matching `cell.output_size`
+      and `cell_output` above.  The parameter `cell_state` and output
+      `next_cell_state` may be either a single or (possibly nested) tuple
+      of tensors.  The parameter `loop_state` and
+      output `next_loop_state` may be either a single or (possibly nested) tuple
+      of `Tensor` and `TensorArray` objects.  This last parameter
       may be ignored by `loop_fn` and the return value may be `None`.  If it
       is not `None`, then the `loop_state` will be propagated through the RNN
       loop, for use purely by `loop_fn` to keep track of its own state.
       The `next_loop_state` parameter returned may be `None`.
 
       The first call to `loop_fn` will be `time = 0`, `cell_output = None`,
-      and `loop_state = None`.  Its `emit_output` value in this case may be
-      either `None` or a (possibly nested) tuple structure of Tensors, e.g.,
+      `cell_state = None`, and `loop_state = None`.  For this call:
+      The `next_cell_state` value should be the value with which to initialize
+      the cell's state.  It may be a final state from a previous RNN or it
+      may be the output of `cell.zero_state()`.  It should be a
+      (possibly nested) tuple structure of tensors.
+      If `cell.state_size` is an integer, this must be
+      a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
+      If `cell.state_size` is a `TensorShape`, this must be a `Tensor` of
+      appropriate type and shape `[batch_size] + cell.state_size`.
+      If `cell.state_size` is a (possibly nested) tuple of ints or
+      `TensorShape`, this will be a tuple having the corresponding shapes.
+      The `emit_output` value may be  either `None` or a (possibly nested)
+      tuple structure of tensors, e.g.,
       `(tf.zeros(shape_0, dtype=dtype_0), tf.zeros(shape_1, dtype=dtype_1))`.
       If this first `emit_output` return value is `None`,
       then the `emit_ta` result of `raw_rnn` will have the same structure and
@@ -1114,13 +1142,6 @@ def raw_rnn(cell, loop_fn, initial_state,
       initializing call are ignored.  Note, this emit structure must be
       consistent across all time steps.
 
-    initial_state: An initial state for the RNN.
-      If `cell.state_size` is an integer, this must be
-      a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
-      If `cell.state_size` is a `TensorShape`, this must be a `Tensor` of
-      appropriate type and shape `[batch_size] + cell.state_size`.
-      If `cell.state_size` is a (possibly nested) tuple of ints or
-      `TensorShape`, this will be a tuple having the corresponding shapes.
     parallel_iterations: (Default: 32).  The number of iterations to run in
       parallel.  Those operations which do not have any temporal dependency
       and can be run in parallel, will be.  This parameter trades off
@@ -1135,26 +1156,25 @@ def raw_rnn(cell, loop_fn, initial_state,
   Returns:
     A tuple `(emit_ta, final_state, final_loop_state)` where:
 
-      `emit_ta`: The RNN output `TensorArray`.
-         If `loop_fn` returns a (possibly nested) set of Tensors for
-         `emit_output` during initialization, (inputs `time = 0`,
-         `cell_output = None`, and `loop_state = None`), then `emit_ta` will
-         have the same structure, dtypes, and shapes as `emit_output` instead.
-         If `loop_fn` returns `emit_output = None` during this call,
-         the structure of `cell.output_size` is used:
+    `emit_ta`: The RNN output `TensorArray`.
+       If `loop_fn` returns a (possibly nested) set of Tensors for
+       `emit_output` during initialization, (inputs `time = 0`,
+       `cell_output = None`, and `loop_state = None`), then `emit_ta` will
+       have the same structure, dtypes, and shapes as `emit_output` instead.
+       If `loop_fn` returns `emit_output = None` during this call,
+       the structure of `cell.output_size` is used:
+       If `cell.output_size` is a (possibly nested) tuple of integers
+       or `TensorShape` objects, then `emit_ta` will be a tuple having the
+       same structure as `cell.output_size`, containing TensorArrays whose
+       elements' shapes correspond to the shape data in `cell.output_size`.
 
-         If `cell.output_size` is a (possibly nested) tuple of integers
-         or `TensorShape` objects, then `emit_ta` will be a tuple having the
-         same structure as `cell.output_size`, containing TensorArrays whose
-         elements' shapes correspond to the shape data in `cell.output_size`.
+    `final_state`: The final cell state.  If `cell.state_size` is an int, this
+      will be shaped `[batch_size, cell.state_size]`.  If it is a
+      `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
+      If it is a (possibly nested) tuple of ints or `TensorShape`, this will
+      be a tuple having the corresponding shapes.
 
-      `final_state`: The final cell state.  If `cell.state_size` is an int, this
-        will be shaped `[batch_size, cell.state_size]`.  If it is a
-        `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
-        If it is a (possibly nested) tuple of ints or `TensorShape`, this will
-        be a tuple having the corresponding shapes.
-
-      `final_loop_state`: The final loop state as returned by `loop_fn`.
+    `final_loop_state`: The final loop state as returned by `loop_fn`.
 
   Raises:
     TypeError: If `cell` is not an instance of RNNCell, or `loop_fn` is not
@@ -1176,8 +1196,9 @@ def raw_rnn(cell, loop_fn, initial_state,
       varscope.set_caching_device(lambda op: op.device)
 
     time = constant_op.constant(0, dtype=dtypes.int32)
-    (elements_finished, next_input, emit_structure, init_loop_state) = loop_fn(
-        time, None, None)  # time, cell_output, loop_state
+    (elements_finished, next_input, initial_state, emit_structure,
+     init_loop_state) = loop_fn(
+         time, None, None, None)  # time, cell_output, cell_state, loop_state
     flat_input = nest.flatten(next_input)
 
     # Need a surrogate loop state for the while_loop if none is available.
@@ -1243,15 +1264,17 @@ def raw_rnn(cell, loop_fn, initial_state,
       Returns:
         Tuple having the same size as Args but with updated values.
       """
-      (next_output, next_state) = cell(current_input, state)
+      (next_output, cell_state) = cell(current_input, state)
 
-      nest.assert_same_structure(state, next_state)
+      nest.assert_same_structure(state, cell_state)
       nest.assert_same_structure(cell.output_size, next_output)
 
       next_time = time + 1
-      (next_finished, next_input, emit_output, next_loop_state) = loop_fn(
-          next_time, next_output, loop_state)
+      (next_finished, next_input, next_state, emit_output,
+       next_loop_state) = loop_fn(
+           next_time, next_output, cell_state, loop_state)
 
+      nest.assert_same_structure(state, next_state)
       nest.assert_same_structure(current_input, next_input)
       nest.assert_same_structure(emit_ta, emit_output)
 
diff --git a/tensorflow/python/tools/optimize_for_inference.py b/tensorflow/python/tools/optimize_for_inference.py
index a330ff7c508..9c115f53bec 100644
--- a/tensorflow/python/tools/optimize_for_inference.py
+++ b/tensorflow/python/tools/optimize_for_inference.py
@@ -27,6 +27,8 @@ the network is used only for inference. These include:
 
  - Folding batch normalization ops into the pre-calculated weights.
 
+ - Fusing common operations into unified versions.
+
 This script takes a frozen GraphDef file (where the weight variables have been
 converted into constants by the freeze_graph script) and outputs a new GraphDef
 with the optimizations applied.
diff --git a/tensorflow/python/tools/optimize_for_inference_lib.py b/tensorflow/python/tools/optimize_for_inference_lib.py
index 4eb138d97d9..1cb5ba16256 100644
--- a/tensorflow/python/tools/optimize_for_inference_lib.py
+++ b/tensorflow/python/tools/optimize_for_inference_lib.py
@@ -27,6 +27,8 @@ the network is used only for inference. These include:
 
  - Folding batch normalization ops into the pre-calculated weights.
 
+ - Fusing common operations into unified versions.
+
 This script takes a frozen GraphDef file (where the weight variables have been
 converted into constants by the freeze_graph script) and outputs a new GraphDef
 with the optimizations applied.
@@ -37,8 +39,8 @@ bazel build tensorflow/python/tools:optimize_for_inference && \
 bazel-bin/tensorflow/python/tools/optimize_for_inference \
 --input_graph=some_graph_def.pb \
 --output_graph=/tmp/optimized_graph.pb \
---input_node_names=Mul
---output_node_names=softmax
+--input_names=Mul \
+--output_names=softmax
 
 """
 
@@ -74,13 +76,42 @@ def optimize_for_inference(input_graph_def, input_node_names,
   Returns:
     An optimized version of the input graph.
   """
-  stripped_graph_def = strip_unused_lib.strip_unused(input_graph_def,
-                                                     input_node_names,
-                                                     output_node_names,
-                                                     placeholder_type_enum)
-  detrained_graph_def = graph_util.remove_training_nodes(stripped_graph_def)
-  folded_graph_def = fold_batch_norms(detrained_graph_def)
-  return folded_graph_def
+  ensure_graph_is_valid(input_graph_def)
+  optimized_graph_def = input_graph_def
+  optimized_graph_def = strip_unused_lib.strip_unused(optimized_graph_def,
+                                                      input_node_names,
+                                                      output_node_names,
+                                                      placeholder_type_enum)
+  optimized_graph_def = graph_util.remove_training_nodes(optimized_graph_def)
+  optimized_graph_def = fold_batch_norms(optimized_graph_def)
+  optimized_graph_def = fuse_resize_and_conv(optimized_graph_def)
+  ensure_graph_is_valid(optimized_graph_def)
+  return optimized_graph_def
+
+
+def ensure_graph_is_valid(graph_def):
+  """Makes sure that the graph is internally consistent.
+
+  Checks basic properties of the graph def and raises an exception if there are
+  input references to missing nodes, duplicated names, or other logic errors.
+
+  Args:
+    graph_def: Definition of a graph to be checked.
+
+  Raises:
+    ValueError: If the graph is incorrectly constructed.
+  """
+  node_map = {}
+  for node in graph_def.node:
+    if node.name not in node_map.keys():
+      node_map[node.name] = node
+    else:
+      raise ValueError("Duplicate node names detected for ", node.name)
+  for node in graph_def.node:
+    for input_name in node.input:
+      input_node_name = node_name_from_input(input_name)
+      if input_node_name not in node_map.keys():
+        raise ValueError("Input for ", node.name, " not found: ", input_name)
 
 
 def node_name_from_input(node_name):
@@ -161,7 +192,7 @@ def fold_batch_norms(input_graph_def):
     if node.name not in input_node_map.keys():
       input_node_map[node.name] = node
     else:
-      raise ValueError("Duplicate node names detected.")
+      raise ValueError("Duplicate node names detected for ", node.name)
 
   nodes_to_skip = {}
   new_ops = []
@@ -303,3 +334,94 @@ def fold_batch_norms(input_graph_def):
 
   result_graph_def.node.extend(new_ops)
   return result_graph_def
+
+
+def fuse_resize_and_conv(input_graph_def):
+  """Merges preceding resize and mirror pad ops into a specialized convolution.
+
+  There's a common pattern of enlarging the input to a convolution using a
+  resize operation, and also using MirrorPad to extend the boundaries to that
+  zero edge pixels don't bleed inwards when convolving. This routine looks for
+  that pattern of operations, and fuses them together into a Conv2DWithResizeOp.
+
+  Args:
+    input_graph_def: A GraphDef containing a model.
+
+  Returns:
+    Modified graph with resize and pad ops merged.
+
+  Raises:
+    ValueError: If the graph is badly formed with duplicate node names.
+  """
+
+  input_node_map = {}
+  for node in input_graph_def.node:
+    if node.name not in input_node_map.keys():
+      input_node_map[node.name] = node
+    else:
+      raise ValueError("Duplicate node names detected for ", node.name)
+
+  nodes_to_skip = {}
+  new_ops = []
+  for node in input_graph_def.node:
+
+    if node.op != "Conv2D":
+      continue
+    conv_op = node
+
+    input_op = node_from_map(input_node_map, conv_op.input[0])
+    if input_op.op == "MirrorPad":
+      mirror_pad_op = input_op
+      resize_op = node_from_map(input_node_map, mirror_pad_op.input[0])
+    else:
+      mirror_pad_op = None
+      resize_op = input_op
+
+    if resize_op.op != "ResizeBilinear":
+      continue
+
+    nodes_to_skip[conv_op.name] = True
+    if mirror_pad_op:
+      nodes_to_skip[mirror_pad_op.name] = True
+    nodes_to_skip[resize_op.name] = True
+
+    fused_conv_op = tf.NodeDef()
+    fused_conv_op.op = "FusedResizeAndPadConv2D"
+    fused_conv_op.name = conv_op.name
+    if mirror_pad_op:
+      mirror_paddings_name = mirror_pad_op.input[1]
+      mirror_paddings_mode = mirror_pad_op.attr["mode"]
+    else:
+      # If there was no MirrorPad op, then create settings that make the padding
+      # stage of the fused operation a no-op.
+      paddings_op = tf.NodeDef()
+      paddings_op.op = "Const"
+      paddings_op.name = conv_op.name + "_dummy_paddings"
+      paddings_op.attr["dtype"].CopyFrom(tf.AttrValue(
+          type=tf.int32.as_datatype_enum))
+      paddings_op.attr["value"].CopyFrom(tf.AttrValue(
+          tensor=tensor_util.make_tensor_proto(
+              [0, 0, 0, 0, 0, 0, 0, 0], tf.int32, [4, 2])))
+      new_ops.extend([paddings_op])
+      mirror_paddings_name = paddings_op.name
+      mirror_paddings_mode = tf.AttrValue(s=b"REFLECT")
+    fused_conv_op.input.extend([resize_op.input[0], resize_op.input[1],
+                                mirror_paddings_name, conv_op.input[1]])
+    fused_conv_op.attr["T"].CopyFrom(conv_op.attr["T"])
+    fused_conv_op.attr["resize_align_corners"].CopyFrom(
+        resize_op.attr["align_corners"])
+    fused_conv_op.attr["mode"].CopyFrom(mirror_paddings_mode)
+    fused_conv_op.attr["strides"].CopyFrom(conv_op.attr["strides"])
+    fused_conv_op.attr["padding"].CopyFrom(conv_op.attr["padding"])
+    new_ops.extend([fused_conv_op])
+
+  result_graph_def = tf.GraphDef()
+  for node in input_graph_def.node:
+    if node.name in nodes_to_skip:
+      continue
+    new_node = tf.NodeDef()
+    new_node.CopyFrom(node)
+    result_graph_def.node.extend([new_node])
+
+  result_graph_def.node.extend(new_ops)
+  return result_graph_def
diff --git a/tensorflow/python/tools/optimize_for_inference_test.py b/tensorflow/python/tools/optimize_for_inference_test.py
index 61644fe9c91..d92d7ab8c7d 100644
--- a/tensorflow/python/tools/optimize_for_inference_test.py
+++ b/tensorflow/python/tools/optimize_for_inference_test.py
@@ -54,6 +54,7 @@ class OptimizeForInferenceTest(tf.test.TestCase):
                                              shape=shape)))
 
   def testOptimizeForInference(self):
+    unused_constant_name = "unused_constant"
     unconnected_add_name = "unconnected_add"
     a_constant_name = "a_constant"
     b_constant_name = "b_constant"
@@ -64,9 +65,14 @@ class OptimizeForInferenceTest(tf.test.TestCase):
     add_name = "add"
     unused_output_add_name = "unused_output_add"
     graph_def = tf.GraphDef()
+    unused_constant = self.create_constant_node_def(unused_constant_name,
+                                                    value=0,
+                                                    dtype=tf.float32,
+                                                    shape=[])
+    graph_def.node.extend([unused_constant])
     unconnected_add_node = self.create_node_def("Add", unconnected_add_name,
-                                                ["no_such_node",
-                                                 "no_such_node"])
+                                                [unused_constant_name,
+                                                 unused_constant_name])
     self.set_attr_dtype(unconnected_add_node, "T", tf.float32)
     graph_def.node.extend([unconnected_add_node])
     a_constant = self.create_constant_node_def(a_constant_name,
@@ -160,6 +166,65 @@ class OptimizeForInferenceTest(tf.test.TestCase):
     for node in optimized_graph_def.node:
       self.assertNotEqual("BatchNormWithGlobalNormalization", node.op)
 
+  def testFuseResizePadAndConv(self):
+    with self.test_session() as sess:
+      inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
+      input_op = tf.constant(np.array(inputs), shape=[1, 2, 3, 2],
+                             dtype=tf.float32)
+      resize_op = tf.image.resize_bilinear(input_op, [12, 4],
+                                           align_corners=False)
+      pad_op = tf.pad(resize_op, [[0, 0], [1, 1], [2, 2], [0, 0]],
+                      mode="REFLECT")
+      weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4]
+      weights_op = tf.constant(np.array(weights), shape=[1, 2, 2, 2],
+                               dtype=tf.float32)
+      tf.nn.conv2d(pad_op, weights_op, [1, 1, 1, 1],
+                   padding="VALID", name="output")
+      original_graph_def = sess.graph_def
+      original_result = sess.run(["output:0"])
+    optimized_graph_def = optimize_for_inference_lib.fuse_resize_and_conv(
+        original_graph_def)
+
+    with self.test_session() as sess:
+      _ = tf.import_graph_def(optimized_graph_def, input_map={},
+                              name="optimized")
+      optimized_result = sess.run(["optimized/output:0"])
+
+    self.assertAllClose(original_result, optimized_result)
+
+    for node in optimized_graph_def.node:
+      self.assertNotEqual("Conv2D", node.op)
+      self.assertNotEqual("MirrorPad", node.op)
+      self.assertNotEqual("ResizeBilinear", node.op)
+
+  def testFuseResizeAndConv(self):
+    with self.test_session() as sess:
+      inputs = [1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6]
+      input_op = tf.constant(np.array(inputs), shape=[1, 2, 3, 2],
+                             dtype=tf.float32)
+      resize_op = tf.image.resize_bilinear(input_op, [12, 4],
+                                           align_corners=False)
+      weights = [1, 2, 3, 4, 0.1, 0.2, 0.3, 0.4]
+      weights_op = tf.constant(np.array(weights), shape=[1, 2, 2, 2],
+                               dtype=tf.float32)
+      tf.nn.conv2d(resize_op, weights_op, [1, 1, 1, 1],
+                   padding="VALID", name="output")
+      original_graph_def = sess.graph_def
+      original_result = sess.run(["output:0"])
+    optimized_graph_def = optimize_for_inference_lib.fuse_resize_and_conv(
+        original_graph_def)
+
+    with self.test_session() as sess:
+      _ = tf.import_graph_def(optimized_graph_def, input_map={},
+                              name="optimized")
+      optimized_result = sess.run(["optimized/output:0"])
+
+    self.assertAllClose(original_result, optimized_result)
+
+    for node in optimized_graph_def.node:
+      self.assertNotEqual("Conv2D", node.op)
+      self.assertNotEqual("ResizeBilinear", node.op)
+
 
 if __name__ == "__main__":
   tf.test.main()
diff --git a/tensorflow/python/tools/strip_unused_lib.py b/tensorflow/python/tools/strip_unused_lib.py
index c9d72ccd391..ded6e1da628 100644
--- a/tensorflow/python/tools/strip_unused_lib.py
+++ b/tensorflow/python/tools/strip_unused_lib.py
@@ -51,6 +51,9 @@ def strip_unused(input_graph_def, input_node_names, output_node_names,
       placeholder_node.name = node.name
       placeholder_node.attr["dtype"].CopyFrom(tf.AttrValue(
           type=placeholder_type_enum))
+      if "_output_shapes" in node.attr:
+        placeholder_node.attr["_output_shapes"].CopyFrom(
+            node.attr["_output_shapes"])
       inputs_replaced_graph_def.node.extend([placeholder_node])
     else:
       inputs_replaced_graph_def.node.extend([copy.deepcopy(node)])
diff --git a/tensorflow/python/tools/strip_unused_test.py b/tensorflow/python/tools/strip_unused_test.py
index 6186d391ec6..a5eeda9d9bd 100644
--- a/tensorflow/python/tools/strip_unused_test.py
+++ b/tensorflow/python/tools/strip_unused_test.py
@@ -71,6 +71,8 @@ class StripUnusedTest(test_util.TensorFlowTestCase):
       for node in output_graph_def.node:
         self.assertNotEqual("Add", node.op)
         self.assertNotEqual("Sub", node.op)
+        if node.name == input_node_names:
+          self.assertTrue("shape" in node.attr)
 
       with tf.Session() as sess:
         input_node = sess.graph.get_tensor_by_name("wanted_input_node:0")
diff --git a/tensorflow/python/training/input.py b/tensorflow/python/training/input.py
index d5d71d66066..fd6f806f25c 100644
--- a/tensorflow/python/training/input.py
+++ b/tensorflow/python/training/input.py
@@ -519,7 +519,7 @@ def batch(tensors, batch_size, num_threads=1, capacity=32,
 
   If `enqueue_many` is `True`, `tensors` is assumed to represent a batch of
   examples, where the first dimension is indexed by example, and all members of
-  `tensor_list` should have the same size in the first dimension.  If an input
+  `tensors` should have the same size in the first dimension.  If an input
   tensor has shape `[*, x, y, z]`, the output will have shape `[batch_size, x,
   y, z]`.  The `capacity` argument controls the how long the prefetching is
   allowed to grow the queues.
@@ -553,11 +553,11 @@ def batch(tensors, batch_size, num_threads=1, capacity=32,
   Args:
     tensors: The list or dictionary of tensors to enqueue.
     batch_size: The new batch size pulled from the queue.
-    num_threads: The number of threads enqueuing `tensor_list`.
+    num_threads: The number of threads enqueuing `tensors`.
     capacity: An integer. The maximum number of elements in the queue.
-    enqueue_many: Whether each tensor in `tensor_list` is a single example.
+    enqueue_many: Whether each tensor in `tensors` is a single example.
     shapes: (Optional) The shapes for each example.  Defaults to the
-      inferred shapes for `tensor_list`.
+      inferred shapes for `tensors`.
     dynamic_pad: Boolean.  Allow variable dimensions in input shapes.
       The given dimensions are padded upon dequeue so that tensors within a
       batch have the same shapes.
diff --git a/tensorflow/tensorboard/TAG b/tensorflow/tensorboard/TAG
index 9902f17848a..f04c001f3f7 100644
--- a/tensorflow/tensorboard/TAG
+++ b/tensorflow/tensorboard/TAG
@@ -1 +1 @@
-28
+29
diff --git a/tensorflow/tensorboard/backend/handler.py b/tensorflow/tensorboard/backend/handler.py
index 1c1e9411a4d..d39d66bf97c 100644
--- a/tensorflow/tensorboard/backend/handler.py
+++ b/tensorflow/tensorboard/backend/handler.py
@@ -516,32 +516,36 @@ class TensorboardHandler(BaseHTTPServer.BaseHTTPRequestHandler):
       path: The path of the static file, relative to the tensorboard/ directory.
     """
     # Strip off the leading forward slash.
-    path = path.lstrip('/')
-    if not self._path_is_safe(path):
-      logging.info('path %s not safe, sending 404', path)
+    orig_path = path.lstrip('/')
+    if not self._path_is_safe(orig_path):
+      logging.info('path %s not safe, sending 404', orig_path)
       # Traversal attack, so 404.
       self.send_error(404)
       return
-
-    if path.startswith('external'):
+    # Resource loader wants a path relative to //WORKSPACE/tensorflow.
+    path = os.path.join('tensorboard', orig_path)
+    # Open the file and read it.
+    try:
+      contents = resource_loader.load_resource(path)
+    except IOError:
       # For compatibility with latest version of Bazel, we renamed bower
       # packages to use '_' rather than '-' in their package name.
       # This means that the directory structure is changed too.
       # So that all our recursive imports work, we need to modify incoming
       # requests to map onto the new directory structure.
+      path = orig_path
       components = path.split('/')
-      components[1] = components[1].replace('-', '_')
+      components[0] = components[0].replace('-', '_')
       path = ('/').join(components)
-      path = os.path.join('../', path)
-    else:
-      path = os.path.join('tensorboard', path)
-    # Open the file and read it.
-    try:
-      contents = resource_loader.load_resource(path)
-    except IOError:
-      logging.info('path %s not found, sending 404', path)
-      self.send_error(404)
-      return
+      # Bazel keeps all the external dependencies in //WORKSPACE/external.
+      # and resource loader wants a path relative to //WORKSPACE/tensorflow/.
+      path = os.path.join('../external', path)
+      try:
+        contents = resource_loader.load_resource(path)
+      except IOError:
+        logging.info('path %s not found, sending 404', path)
+        self.send_error(404)
+        return
     mimetype, encoding = mimetypes.guess_type(path)
     mimetype = mimetype or 'application/octet-stream'
     self._respond(contents, mimetype, encoding=encoding)
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/categorizer.ts b/tensorflow/tensorboard/components/tf-dashboard-common/categorizer.ts
index 5131b108e4b..eba1b0f7f44 100644
--- a/tensorflow/tensorboard/components/tf-dashboard-common/categorizer.ts
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/categorizer.ts
@@ -49,25 +49,11 @@ module Categorizer {
   /* Canonical TensorFlow ops are namespaced using forward slashes.
    * This fallback categorizer categorizes by the top-level namespace.
    */
-  export var topLevelNamespaceCategorizer: Categorizer = splitCategorizer(/\//);
-
   // Try to produce good categorizations on legacy graphs, which often
   // are namespaced like l1_foo/bar or l2_baz/bam.
   // If there is no leading underscore before the first forward slash,
   // then it behaves the same as topLevelNamespaceCategorizer
-  export var legacyUnderscoreCategorizer: Categorizer =
-      splitCategorizer(/[\/_]/);
-
-  export function fallbackCategorizer(s: string): Categorizer {
-    switch (s) {
-      case 'TopLevelNamespaceCategorizer':
-        return topLevelNamespaceCategorizer;
-      case 'LegacyUnderscoreCategorizer':
-        return legacyUnderscoreCategorizer;
-      default:
-        throw new Error('Unrecognized categorization strategy: ' + s);
-    }
-  }
+  export var rootNameUnderscoreCategorizer = rootNameCategorizer(/[\/_]/);
 
   /* An 'extractor' is a function that takes a tag name, and 'extracts' a
    * category name.
@@ -81,14 +67,14 @@ module Categorizer {
       if (tags.length === 0) {
         return [];
       }
-      var sortedTags = tags.slice().sort(VZ.Sorting.compareTagNames);
-      var categories: Category[] = [];
-      var currentCategory = {
+      let sortedTags = tags.slice().sort(VZ.Sorting.compareTagNames);
+      let categories: Category[] = [];
+      let currentCategory = {
         name: extractor(sortedTags[0]),
         tags: [],
       };
       sortedTags.forEach((t: string) => {
-        var topLevel = extractor(t);
+        let topLevel = extractor(t);
         if (currentCategory.name !== topLevel) {
           categories.push(currentCategory);
           currentCategory = {
@@ -103,48 +89,77 @@ module Categorizer {
     };
   }
 
-  function splitCategorizer(r: RegExp): Categorizer {
-    var extractor = (t: string) => {
-      return t.split(r)[0];
-    };
+  /** Split on a regex, taking just the first element after splitting.
+   * It's like getting the root directory. E.g. if you split on slash, then
+   * 'foo/bar/zod' will go to 'foo'
+   */
+  function rootNameCategorizer(r: RegExp): Categorizer {
+    let extractor = (t: string) => { return t.split(r)[0]; };
     return extractorToCategorizer(extractor);
   }
 
+  /* Split on a regex, taking all the prefix until the last split.
+   * It's like getting the dirname of a path. E.g. if you split on slash, then
+   * 'foo/bar/zod' will go to 'foo/bar'.
+   * In the case where there are no splits (e.g. 'foo') then it uses 'foo' as
+   * the category name.
+   */
+  function dnameExtractor(t: string) {
+    let splits = t.split('/');
+    if (splits.length === 1) {
+      return t;
+    } else {
+      let last = _.last(splits);
+      return t.slice(0, t.length - last.length - 1);
+    }
+  }
+
+  export var directoryNameCategorizer = extractorToCategorizer(dnameExtractor);
+
   export interface CategoryDefinition {
     name: string;
     matches: (t: string) => boolean;
   }
 
   export function defineCategory(ruledef: string): CategoryDefinition {
-    var r = new RegExp(ruledef);
-    var f = function(tag: string): boolean {
-      return r.test(tag);
-    };
+    let r = new RegExp(ruledef);
+    let f = function(tag: string): boolean { return r.test(tag); };
     return { name: ruledef, matches: f };
   }
 
   export function _categorizer(
       rules: CategoryDefinition[], fallback: Categorizer) {
     return function(tags: string[]): Category[] {
-      var remaining: d3.Set = d3.set(tags);
-      var userSpecified = rules.map((def: CategoryDefinition) => {
-        var tags: string[] = [];
+      let remaining: d3.Set = d3.set(tags);
+      let userSpecified = rules.map((def: CategoryDefinition) => {
+        let tags: string[] = [];
         remaining.forEach((t: string) => {
           if (def.matches(t)) {
             tags.push(t);
           }
         });
-        var cat = {name: def.name, tags: tags.sort(VZ.Sorting.compareTagNames)};
+        let cat = {name: def.name, tags: tags.sort(VZ.Sorting.compareTagNames)};
         return cat;
       });
-      var defaultCategories = fallback(remaining.values());
+      let defaultCategories = fallback(remaining.values());
       return userSpecified.concat(defaultCategories);
     };
   }
 
+  export function fallbackCategorizer(s: string): Categorizer {
+    switch (s) {
+      case 'DirectoryNameCategorizer':
+        return directoryNameCategorizer;
+      case 'RootNameUnderscoreCategorizer':
+        return rootNameUnderscoreCategorizer;
+      default:
+        throw new Error('Unrecognized categorization strategy: ' + s);
+    }
+  }
+
   export function categorizer(s: CustomCategorization): Categorizer {
-    var rules = s.categoryDefinitions.map(defineCategory);
-    var fallback = fallbackCategorizer(s.fallbackCategorizer);
+    let rules = s.categoryDefinitions.map(defineCategory);
+    let fallback = fallbackCategorizer(s.fallbackCategorizer);
     return _categorizer(rules, fallback);
   };
 }
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/dashboard-style.html b/tensorflow/tensorboard/components/tf-dashboard-common/dashboard-style.html
index 2126015b135..b6225ba5b23 100644
--- a/tensorflow/tensorboard/components/tf-dashboard-common/dashboard-style.html
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/dashboard-style.html
@@ -21,10 +21,6 @@ limitations under the License.
 <dom-module id="dashboard-style">
   <template>
     <style>
-      #content-container{
-        display: block;
-      }
-
       .sidebar {
         display: flex;
         flex-direction: column;
@@ -32,36 +28,6 @@ limitations under the License.
         margin-right: 20px;
       }
 
-      #categorizer {
-        flex-shrink: 0;
-      }
-
-      #xTypeSelector {
-        flex-shrink: 0;
-        margin: 20px 0;
-      }
-
-      #runSelector {
-        flex-shrink: 1;
-        flex-grow: 1;
-      }
-
-      #tooltip-sorting {
-        display: flex;
-        font-size: 14px;
-      }
-
-      #tooltip-sorting-label {
-        margin-top: 13px;
-        margin-left: 28px;
-      }
-
-      #tooltip-sorting paper-dropdown-menu {
-        margin-left: 10px;
-        --paper-input-container-focus-color: var(--tb-orange-strong);
-        width: 105px;
-      }
-
       .sidebar-section {
         border-top: solid 1px rgba(0, 0, 0, 0.12);
         padding: 20px 0px 20px 30px;
@@ -80,6 +46,7 @@ limitations under the License.
         --paper-checkbox-checked-color: var(--tb-ui-dark-accent);
         --paper-checkbox-unchecked-color: var(--tb-ui-dark-accent);
         font-size: 14px;
+        margin-top: 5px;
       }
     </style>
   </template>
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/test/categorizerTest.ts b/tensorflow/tensorboard/components/tf-dashboard-common/test/categorizerTest.ts
index 747fe7e71a3..43ff5f8850e 100644
--- a/tensorflow/tensorboard/components/tf-dashboard-common/test/categorizerTest.ts
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/test/categorizerTest.ts
@@ -12,30 +12,29 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-var assert = chai.assert;
 
 module Categorizer {
   describe('categorizer', () => {
-    describe('topLevelNamespaceCategorizer', () => {
+    describe('directoryNameCategorizer', () => {
       it('returns empty array on empty tags',
-         () => { assert.lengthOf(topLevelNamespaceCategorizer([]), 0); });
+         () => { assert.lengthOf(directoryNameCategorizer([]), 0); });
 
       it('handles a simple case', () => {
-        var simple = [
+        let simple = [
           'foo1/bar', 'foo1/zod', 'foo2/bar', 'foo2/zod', 'gosh/lod/mar',
           'gosh/lod/ned'
         ];
-        var expected = [
+        let expected = [
           {name: 'foo1', tags: ['foo1/bar', 'foo1/zod']},
           {name: 'foo2', tags: ['foo2/bar', 'foo2/zod']},
-          {name: 'gosh', tags: ['gosh/lod/mar', 'gosh/lod/ned']},
+          {name: 'gosh/lod', tags: ['gosh/lod/mar', 'gosh/lod/ned']},
         ];
-        assert.deepEqual(topLevelNamespaceCategorizer(simple), expected);
+        assert.deepEqual(directoryNameCategorizer(simple), expected);
       });
 
       it('orders the categories', () => {
-        var test = ['e', 'f', 'g', 'a', 'b', 'c'];
-        var expected = [
+        let test = ['e', 'f', 'g', 'a', 'b', 'c'];
+        let expected = [
           {name: 'a', tags: ['a']},
           {name: 'b', tags: ['b']},
           {name: 'c', tags: ['c']},
@@ -43,13 +42,13 @@ module Categorizer {
           {name: 'f', tags: ['f']},
           {name: 'g', tags: ['g']},
         ];
-        assert.deepEqual(topLevelNamespaceCategorizer(test), expected);
+        assert.deepEqual(directoryNameCategorizer(test), expected);
       });
 
       it('handles cases where category names overlap node names', () => {
-        var test = ['a', 'a/a', 'a/b', 'a/c', 'b', 'b/a'];
-        var actual = topLevelNamespaceCategorizer(test);
-        var expected = [
+        let test = ['a', 'a/a', 'a/b', 'a/c', 'b', 'b/a'];
+        let actual = directoryNameCategorizer(test);
+        let expected = [
           {name: 'a', tags: ['a', 'a/a', 'a/b', 'a/c']},
           {name: 'b', tags: ['b', 'b/a']},
         ];
@@ -58,18 +57,39 @@ module Categorizer {
 
       it('handles singleton case', () => {
         assert.deepEqual(
-            topLevelNamespaceCategorizer(['a']), [{name: 'a', tags: ['a']}]);
+            directoryNameCategorizer(['a']), [{name: 'a', tags: ['a']}]);
+      });
+
+      it('splits on bottom level name', () => {
+        let example = [
+          'foo1/bar',
+          'foo1/zod',
+          'foo2/bar',
+          'foo2/zod',
+          'gosh/lod/mar',
+          'gosh/lod/ned',
+          'gosh/zod/mar',
+          'gosh/zod/ned/y',
+        ];
+        let expected = [
+          {name: 'foo1', tags: ['foo1/bar', 'foo1/zod']},
+          {name: 'foo2', tags: ['foo2/bar', 'foo2/zod']},
+          {name: 'gosh/lod', tags: ['gosh/lod/mar', 'gosh/lod/ned']},
+          {name: 'gosh/zod', tags: ['gosh/zod/mar']},
+          {name: 'gosh/zod/ned', tags: ['gosh/zod/ned/y']},
+        ];
+        assert.deepEqual(directoryNameCategorizer(example), expected);
       });
     });
 
-    describe('legacyUnderscoreCategorizer', () => {
+    describe('RootNameUnderscoreCategorizer', () => {
       it('splits by shorter of first _ or /', () => {
-        var tags = [
+        let tags = [
           'l0_bar/foo', 'l0_bar/baz', 'l0_foo/wob', 'l1_zoink/bla',
           'l1_wibble/woz', 'l1/foo_woink', 'l2/wozzle_wizzle'
         ];
-        var actual = legacyUnderscoreCategorizer(tags);
-        var expected = [
+        let actual = rootNameUnderscoreCategorizer(tags);
+        let expected = [
           {name: 'l0', tags: ['l0_bar/baz', 'l0_bar/foo', 'l0_foo/wob']},
           {name: 'l1', tags: ['l1/foo_woink', 'l1_wibble/woz', 'l1_zoink/bla']},
           {name: 'l2', tags: ['l2/wozzle_wizzle']},
@@ -85,15 +105,15 @@ module Categorizer {
 
       function testCategorizer(
           defs: string[], fallback: Categorizer, tags: string[]): Category[] {
-        var catDefs = defs.map(defineCategory);
+        let catDefs = defs.map(defineCategory);
         return _categorizer(catDefs, fallback)(tags);
       }
 
       it('categorizes by regular expression', () => {
-        var defs = ['foo..', 'bar..'];
-        var tags = ['fooab', 'fooxa', 'barts', 'barms'];
-        var actual = testCategorizer(defs, noFallbackCategorizer, tags);
-        var expected = [
+        let defs = ['foo..', 'bar..'];
+        let tags = ['fooab', 'fooxa', 'barts', 'barms'];
+        let actual = testCategorizer(defs, noFallbackCategorizer, tags);
+        let expected = [
           {name: 'foo..', tags: ['fooab', 'fooxa']},
           {name: 'bar..', tags: ['barms', 'barts']},
         ];
@@ -101,10 +121,10 @@ module Categorizer {
       });
 
       it('matches non-exclusively', () => {
-        var tags = ['abc', 'bar', 'zod'];
-        var actual =
+        let tags = ['abc', 'bar', 'zod'];
+        let actual =
             testCategorizer(['...', 'bar'], noFallbackCategorizer, tags);
-        var expected = [
+        let expected = [
           {name: '...', tags: ['abc', 'bar', 'zod']},
           {name: 'bar', tags: ['bar']},
         ];
@@ -112,9 +132,9 @@ module Categorizer {
       });
 
       it('creates categories for unmatched rules', () => {
-        var actual =
+        let actual =
             testCategorizer(['a', 'b', 'c'], noFallbackCategorizer, []);
-        var expected = [
+        let expected = [
           {name: 'a', tags: []},
           {name: 'b', tags: []},
           {name: 'c', tags: []},
@@ -123,10 +143,10 @@ module Categorizer {
       });
 
       it('category regexs work with special characters', () => {
-        var defs = ['^\\w+$', '^\\d+$', '^\\/..$'];
-        var tags = ['foo', '3243', '/xa'];
-        var actual = testCategorizer(defs, noFallbackCategorizer, tags);
-        var expected = [
+        let defs = ['^\\w+$', '^\\d+$', '^\\/..$'];
+        let tags = ['foo', '3243', '/xa'];
+        let actual = testCategorizer(defs, noFallbackCategorizer, tags);
+        let expected = [
           {name: '^\\w+$', tags: ['3243', 'foo']},
           {name: '^\\d+$', tags: ['3243']},
           {name: '^\\/..$', tags: ['/xa']},
@@ -135,20 +155,20 @@ module Categorizer {
       });
 
       it('category tags are sorted', () => {
-        var tags = ['a', 'z', 'c', 'd', 'e', 'x', 'f', 'y', 'g'];
-        var sorted = tags.slice().sort();
-        var expected = [{name: '.*', tags: sorted}];
-        var actual = testCategorizer(['.*'], noFallbackCategorizer, tags);
+        let tags = ['a', 'z', 'c', 'd', 'e', 'x', 'f', 'y', 'g'];
+        let sorted = tags.slice().sort();
+        let expected = [{name: '.*', tags: sorted}];
+        let actual = testCategorizer(['.*'], noFallbackCategorizer, tags);
         assert.deepEqual(actual, expected);
       });
 
       it('if nonexclusive: all tags passed to fallback', () => {
-        var passedToDefault = null;
+        let passedToDefault = null;
         function defaultCategorizer(tags: string[]): Category[] {
           passedToDefault = tags;
           return [];
         }
-        var tags = ['foo', 'bar', 'foo123'];
+        let tags = ['foo', 'bar', 'foo123'];
         testCategorizer(['foo'], defaultCategorizer, tags);
         assert.deepEqual(passedToDefault, tags);
       });
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html b/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html
index 4b588f63231..db17bd4dab6 100644
--- a/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html
@@ -48,8 +48,8 @@ categories are exclusive.
     </div>
     <div id="underscore-categorization">
       <paper-checkbox
-        checked="{{splitOnUnderscore}}"
-      >Split on underscores</paper-checkbox>
+        checked="{{useLegacyCategorizer}}"
+      >Use Legacy Categorizer</paper-checkbox>
     </div>
     <style>
       :host {
@@ -76,14 +76,14 @@ categories are exclusive.
         categoriesAreExclusive: {type: Boolean, value: true},
         fallbackCategorizer: {
           type: String,
-          computed: "chooseFallbackCategorizer(splitOnUnderscore)"
+          computed: "chooseFallbackCategorizer(useLegacyCategorizer)"
         },
-        splitOnUnderscore: {
+        useLegacyCategorizer: {
           type: Boolean,
           notify: true,
-          value: TF.URIStorage.getBooleanInitializer('splitOnUnderscore',
+          value: TF.URIStorage.getBooleanInitializer('useLegacyCategorizer',
               false),
-          observer: '_splitOnUnderscoreObserver'
+          observer: '_useLegacyCategorizerObserver'
         },
         categorizer: {
           type: Object,
@@ -106,15 +106,15 @@ categories are exclusive.
           this._setCategories(categories);
         })
       },
-      chooseFallbackCategorizer: function(splitOnUnderscore) {
-        if (splitOnUnderscore) {
-          return "LegacyUnderscoreCategorizer";
+      chooseFallbackCategorizer: function(useLegacyCategorizer) {
+        if (useLegacyCategorizer) {
+          return "RootNameUnderscoreCategorizer";
         } else {
-          return "TopLevelNamespaceCategorizer";
+          return "DirectoryNameCategorizer";
         }
       },
-      _splitOnUnderscoreObserver: TF.URIStorage.getBooleanObserver(
-          'splitOnUnderscore', false)
+      _useLegacyCategorizerObserver: TF.URIStorage.getBooleanObserver(
+          'useLegacyCategorizer', false)
     });
   </script>
 </dom-module>
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/tf-panes-helper.html b/tensorflow/tensorboard/components/tf-dashboard-common/tf-panes-helper.html
index 7ab8254a57e..6c5d680f7b8 100644
--- a/tensorflow/tensorboard/components/tf-dashboard-common/tf-panes-helper.html
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/tf-panes-helper.html
@@ -69,10 +69,12 @@ downloadLinkUrlFunction property to an appropriate value.
         <div class="layout horizontal wrap">
           <template is="dom-repeat" items="[[_categoryCards(category, selectedRuns.*, run2tag.*)]]">
               <div class="card">
-                <span class="card-title">[[item.tag]]</span>
-                <template is="dom-if" if="[[repeatForRuns]]">
-                  <span class="card-subtitle">[[item.run]]</span>
-                </template>
+                <div class="card-title-container" style="border-color: [[_titleBorderColor(item.run)]]">
+                  <span class="card-title">[[item.tag]]</span>
+                  <template is="dom-if" if="[[repeatForRuns]]">
+                    <span class="card-subtitle">[[item.run]]</span>
+                  </template>
+                </div>
                 <div class="card-content">
                   <tf-chart-scaffold
                     tag="[[item.tag]]"
@@ -111,7 +113,7 @@ downloadLinkUrlFunction property to an appropriate value.
         display: flex;
         flex-direction: column;
         margin: 5px;
-        padding: 0 30px 30px 0;
+        padding: 0 30px 35px 0;
         -webkit-user-select: none;
         -moz-user-select: none;
         position: relative;
@@ -130,6 +132,10 @@ downloadLinkUrlFunction property to an appropriate value.
         overflow: hidden;
       }
 
+      .card-title {
+        display: block;
+      }
+
       .card-subtitle {
         font-size: 12px;
       }
@@ -152,6 +158,11 @@ downloadLinkUrlFunction property to an appropriate value.
         pointer-events: none;
       }
 
+      .card-title-container {
+        border-left: 4px solid;
+        padding-left: 5px;
+      }
+
       .expand-button {
         color: #2196F3;
         width: 32px;
@@ -183,6 +194,11 @@ downloadLinkUrlFunction property to an appropriate value.
          */
         categories: Array,
 
+        /**
+         * Input of the colors that are used for the user's runs.
+         */
+        colorScale: Object,
+
         /**
          * The name of the data type that is used by this dashboard. This will
          * be used to display what is missing when there is no data available.
@@ -309,6 +325,9 @@ downloadLinkUrlFunction property to an appropriate value.
 
         return cards;
       },
+      _titleBorderColor: function(run) {
+        return this.repeatForRuns ? this.colorScale.scale(run) : 'white';
+      },
 
       /*
        * Polymer data binding forwarding functions. Check the
diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/tf-sidebar-helper.html b/tensorflow/tensorboard/components/tf-dashboard-common/tf-sidebar-helper.html
new file mode 100644
index 00000000000..e2e43fb084d
--- /dev/null
+++ b/tensorflow/tensorboard/components/tf-dashboard-common/tf-sidebar-helper.html
@@ -0,0 +1,159 @@
+<!--
+@license
+Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+<link rel="import" href="../polymer/polymer.html">
+<link rel="import" href="tf-categorizer.html">
+<link rel="import" href="tf-run-selector.html">
+
+<!--
+tf-sidebar-helper is a component that renders a sidebar for configuration
+components, like the tf-categorizer and the tf-run-selector. The component can
+also be extended with more options useful to the dashboards.
+
+To use it, create the tf-sidebar-helper with the required properties. To extend
+it with extra configuration components, add them to the element's component:
+
+<tf-sidebar-helper
+  categories: "{{outputCategories}}",
+  colorScale: "[[colorScale]]",
+  run2tag: "[[run2tag]]",
+  runs: "[[runs]]",
+  selectedRuns: "{{outSelectedRuns}}",
+  >
+  <div class="extend-first-section">
+    <my options>
+  </div>
+  <div class="sidebar-section">
+    <my options>
+  </div>
+  ...
+</tf-sidebar-helper>
+
+Elements inside the .extend-first-section div will be put on the first section
+of the sidebar, while the rest of the divs will be put after it and before the
+tf-run-selector.
+
+@element tf-sidebar-helper
+-->
+<dom-module id="tf-sidebar-helper">
+  <template>
+    <div class="sidebar-section">
+      <tf-categorizer
+        id="categorizer"
+        tags="[[_visibleTags]]"
+        categories="{{categories}}"
+        ></tf-categorizer>
+      <content select=".extend-first-section"></content>
+    </div>
+    <content></content>
+    <div class="sidebar-section">
+      <tf-run-selector
+        id="runSelector"
+        runs="[[runs]]"
+        color-scale="[[colorScale]]"
+        out-selected="{{selectedRuns}}"
+        ></tf-run-selector>
+    </div>
+    <style include="dashboard-style"></style>
+    <style>
+      :host {
+        display: flex;
+        flex-direction: column;
+        height: 100%;
+      }
+
+      #categorizer {
+        flex-shrink: 0;
+      }
+
+      #runSelector {
+        flex-shrink: 1;
+        flex-grow: 1;
+      }
+
+      .sidebar-section {
+        border-top: solid 1px rgba(0, 0, 0, 0.12);
+        padding: 20px 0px 20px 30px;
+      }
+
+      .sidebar-section:first-child {
+        border: none;
+      }
+
+      .sidebar-section:last-child {
+        flex-grow: 1;
+        display: flex;
+      }
+
+      paper-checkbox {
+        --paper-checkbox-checked-color: var(--tb-ui-dark-accent);
+        --paper-checkbox-unchecked-color: var(--tb-ui-dark-accent);
+        font-size: 14px;
+      }
+    </style>
+  </template>
+  <script>
+    Polymer({
+      is: "tf-sidebar-helper",
+      properties: {
+        /**
+         * This is an output of the categories that the user selected to
+         * separate the different tags. Each category here should be given its
+         * own collapsible pane.
+         */
+        categories: {
+          type: Array,
+          notify: true,
+        },
+
+        /**
+         * Input of the colors that are used for the user's runs.
+         */
+        colorScale: Object,
+
+        /**
+         * Map from runs to the valid tags that have them.
+         */
+        run2tag: Object,
+
+        /**
+         * Input of all valid runs that can be selected by the user.
+         */
+        runs: Array,
+
+        /**
+         * Outputs an array with the runs that are selected by the user (i.e.
+         * valid to be displayed).
+         */
+        selectedRuns: {
+          type: Array,
+          notify: true,
+        },
+
+        _visibleTags: {
+          type: Array,
+          computed: "_getVisibleTags(selectedRuns.*, run2tag.*)"
+        },
+      },
+      _getVisibleTags: function() {
+        var keys = this.selectedRuns;
+        var dict = this.run2tag;
+        return _.union.apply(null, keys.map(function(k) {return dict[k]}));
+      },
+    })
+  </script>
+</dom-module>
diff --git a/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-dashboard.html b/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-dashboard.html
index bd3447ede86..6500271c876 100644
--- a/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-dashboard.html
+++ b/tensorflow/tensorboard/components/tf-distribution-dashboard/tf-distribution-dashboard.html
@@ -18,12 +18,10 @@ limitations under the License.
 <link rel="import" href="../polymer/polymer.html">
 <link rel="import" href="../tf-backend/tf-backend.html">
 <link rel="import" href="../tf-color-scale/tf-color-scale.html">
-<link rel="import" href="../tf-dashboard-common/tf-categorizer.html">
-<link rel="import" href="../tf-dashboard-common/tf-collapsable-pane.html">
 <link rel="import" href="../tf-dashboard-common/tf-dashboard.html">
 <link rel="import" href="../tf-dashboard-common/tf-option-selector.html">
-<link rel="import" href="../tf-dashboard-common/tf-run-selector.html">
 <link rel="import" href="../tf-dashboard-common/tf-panes-helper.html">
+<link rel="import" href="../tf-dashboard-common/tf-sidebar-helper.html">
 <link rel="import" href="../tf-imports/lodash.html">
 <link rel="import" href="tf-distribution-chart.html">
 <link rel="import" href="../iron-collapse/iron-collapse.html">
@@ -33,17 +31,16 @@ limitations under the License.
 tf-distribution-dashboard is a complete frontend that loads runs from a backend,
 and creates chart panes that display data for those runs.
 
-It provides a categorizer, run selector, and x type selector, by which the user
-can customize how data is organized and displayed.
+It provides a x type selector and the normal tf-sidebar-helper options, by
+which the user can customize how data is organized and displayed.
 
-Each chart has a button that can toggle whether it is "selected"; selectedRuns
+Each chart has a button that can toggle whether it is "expanded"; expanded
 charts are larger.
 
 Organizationally, the #plumbing div contains components that have no concrete
-manifestation and just effect data bindings or data loading. The #sidebar contains
-shared controls like the tf-categorizer, tf-run-selector, and tf-x-type-selector.
-The #center div contains tf-distribution-charts embedded inside
-tf-collapsable-panes.
+manifestation and just effect data bindings or data loading. The .sidebar div
+contains shared controls provided by tf-sidebar-helper. The .center div
+contains tf-distribution-charts embedded inside tf-panes-helper's.
 -->
 <dom-module id="tf-distribution-dashboard">
   <template>
@@ -51,19 +48,19 @@ tf-collapsable-panes.
       <tf-color-scale
         id="colorScale"
         runs="[[runs]]"
-        out-color-scale="{{colorScale}}"
+        out-color-scale="{{_colorScale}}"
       ></tf-color-scale>
     </div>
 
     <tf-dashboard-layout>
       <div class="sidebar">
-        <div class="sidebar-section">
-          <tf-categorizer
-            id="categorizer"
-            tags="[[_visibleTags]]"
-            categories="{{categories}}"
-          ></tf-categorizer>
-        </div>
+        <tf-sidebar-helper
+          categories="{{_categories}}"
+          color-scale="[[_colorScale]]"
+          run2tag="[[run2tag]]"
+          runs="[[runs]]"
+          selected-runs="{{_selectedRuns}}"
+          >
         <div class="sidebar-section">
           <tf-option-selector
             id="xTypeSelector"
@@ -75,29 +72,23 @@ tf-collapsable-panes.
             <paper-button id="wall_time">wall</paper-button>
           </tf-option-selector>
         </div>
-        <div class="sidebar-section">
-          <tf-run-selector
-            id="runSelector"
-            runs="[[runs]]"
-            color-scale="[[colorScale]]"
-            out-selected="{{selectedRuns}}"
-          ></tf-run-selector>
-          </div>
+        </tf-sidebar-helper>
       </div>
 
       <div class="center">
         <tf-panes-helper
-          categories="[[categories]]"
+          categories="[[_categories]]"
+          color-scale="[[_colorScale]]"
           data-type="[[dataType]]"
           data-provider="[[dataProvider]]"
           run2tag="[[run2tag]]"
-          selected-runs="[[selectedRuns]]"
+          selected-runs="[[_selectedRuns]]"
           repeat-for-runs
           >
           <template>
             <tf-distribution-chart
               x-type="[[_xType]]"
-              color-scale="[[colorScale]]"
+              color-scale="[[_colorScale]]"
               ></tf-distribution-chart>
           </template>
         </tf-panes-helper>
@@ -115,21 +106,12 @@ tf-collapsable-panes.
         TF.Backend.Behavior,
       ],
       properties: {
-        _visibleTags: {
-          type: Array,
-          computed: "_getVisibleTags(selectedRuns.*, run2tag.*)"
-        },
         _xType: {
           type: String,
           value: "step"
         },
         dataType: {value: "compressedHistogram"},
       },
-      _getVisibleTags: function() {
-        var keys = this.selectedRuns;
-        var dict = this.run2tag;
-        return _.union.apply(null, keys.map(function(k) {return dict[k]}));
-      },
     });
   </script>
 </dom-module>
diff --git a/tensorflow/tensorboard/components/tf-event-dashboard/tf-event-dashboard.html b/tensorflow/tensorboard/components/tf-event-dashboard/tf-event-dashboard.html
index 835ec5b04b8..9ebae5dfe60 100644
--- a/tensorflow/tensorboard/components/tf-event-dashboard/tf-event-dashboard.html
+++ b/tensorflow/tensorboard/components/tf-event-dashboard/tf-event-dashboard.html
@@ -19,13 +19,10 @@ limitations under the License.
 <link rel="import" href="tf-smoothing-input.html">
 <link rel="import" href="../tf-backend/tf-backend.html">
 <link rel="import" href="../tf-color-scale/tf-color-scale.html">
-<link rel="import" href="../tf-dashboard-common/tf-categorizer.html">
-<link rel="import" href="../tf-dashboard-common/tf-chart-scaffold.html">
-<link rel="import" href="../tf-dashboard-common/tf-collapsable-pane.html">
 <link rel="import" href="../tf-dashboard-common/tf-dashboard.html">
 <link rel="import" href="../tf-dashboard-common/tf-option-selector.html">
-<link rel="import" href="../tf-dashboard-common/tf-run-selector.html">
 <link rel="import" href="../tf-dashboard-common/tf-panes-helper.html">
+<link rel="import" href="../tf-dashboard-common/tf-sidebar-helper.html">
 <link rel="import" href="../tf-imports/lodash.html">
 <link rel="import" href="../vz-line-chart/vz-line-chart.html">
 <link rel="import" href="../iron-collapse/iron-collapse.html">
@@ -41,13 +38,13 @@ and creates chart panes that display data for those runs.
 It provides a categorizer, run selector, and x type selector, by which the user
 can customize how data is organized and displayed.
 
-Each chart has a button that can toggle whether it is "selected"; selectedRuns
+Each chart has a button that can toggle whether it is "expanded"; expanded
 charts are larger.
 
 Organizationally, the #plumbing div contains components that have no concrete
-manifestation and just effect data bindings or data loading. The #sidebar contains
-shared controls like the tf-categorizer, tf-run-selector, and tf-x-type-selector.
-The #center div contains tf-line-charts embedded inside tf-collapsable-panes.
+manifestation and just effect data bindings or data loading. The .sidebar div
+contains shared controls provided by tf-sidebar-helper. The .center div
+contains vz-line-charts embedded inside tf-panes-helper's.
 -->
 <dom-module id="tf-event-dashboard">
   <template>
@@ -55,78 +52,74 @@ The #center div contains tf-line-charts embedded inside tf-collapsable-panes.
       <tf-color-scale
         id="colorScale"
         runs="[[runs]]"
-        out-color-scale="{{colorScale}}"
+        out-color-scale="{{_colorScale}}"
       ></tf-color-scale>
     </div>
 
     <tf-dashboard-layout>
       <div class="sidebar">
-        <div class="sidebar-section">
-          <tf-categorizer
-            id="categorizer"
-            tags="[[_visibleTags]]"
-            categories="{{categories}}"
-          ></tf-categorizer>
-          <paper-checkbox
-            id="download-option"
-            checked="{{_showDownloadLinks}}"
-          >Data download links</paper-checkbox>
-          <div id="tooltip-sorting">
-            <div id="tooltip-sorting-label">Tooltip sorting method:</div>
-            <paper-dropdown-menu
-              no-label-float
-              selected-item-label="{{_tooltipSortingMethod}}"
-            >
-              <paper-menu class="dropdown-content" selected="0">
-                <paper-item>default</paper-item>
-                <paper-item>descending</paper-item>
-                <paper-item>ascending</paper-item>
-              </paper-menu>
-            </paper-dropdown-menu>
+        <tf-sidebar-helper
+          categories="{{_categories}}"
+          color-scale="[[_colorScale]]"
+          run2tag="[[run2tag]]"
+          runs="[[runs]]"
+          selected-runs="{{_selectedRuns}}"
+          >
+          <div class="extend-first-section">
+            <paper-checkbox
+              id="download-option"
+              checked="{{_showDownloadLinks}}"
+              >Data download links</paper-checkbox>
+            <div id="tooltip-sorting">
+              <div id="tooltip-sorting-label">Tooltip sorting method:</div>
+              <paper-dropdown-menu
+                no-label-float
+                selected-item-label="{{_tooltipSortingMethod}}"
+                >
+                <paper-menu class="dropdown-content" selected="0">
+                  <paper-item>default</paper-item>
+                  <paper-item>descending</paper-item>
+                  <paper-item>ascending</paper-item>
+                </paper-menu>
+              </paper-dropdown-menu>
+            </div>
           </div>
-        </div>
-        <div class="sidebar-section">
-          <tf-smoothing-input
-            weight="{{_smoothingWeight}}"
-            step="0.001"
-            min="0"
-            max="1"
-            ></tf-smoothing-input>
-        </div>
-        <div class="sidebar-section">
-          <tf-option-selector
-            id="xTypeSelector"
-            name="Horizontal Axis"
-            selected-id="{{_xType}}"
-            >
-            <paper-button id="step">step</paper-button>
-            <paper-button id="relative">relative</paper-button>
-            <paper-button id="wall_time">wall</paper-button>
-          </tf-option-selector>
-        </div>
-        <div class="sidebar-section">
-          <tf-run-selector
-            id="runSelector"
-            runs="[[runs]]"
-            color-scale="[[colorScale]]"
-            out-selected="{{selectedRuns}}"
-          ></tf-run-selector>
-        </div>
+          <div class="sidebar-section">
+            <tf-smoothing-input
+              weight="{{_smoothingWeight}}"
+              step="0.001"
+              min="0"
+              max="1"
+              ></tf-smoothing-input>
+          </div>
+          <div class="sidebar-section">
+            <tf-option-selector
+              id="xTypeSelector"
+              name="Horizontal Axis"
+              selected-id="{{_xType}}"
+              >
+              <paper-button id="step">step</paper-button>
+              <paper-button id="relative">relative</paper-button>
+              <paper-button id="wall_time">wall</paper-button>
+            </tf-option-selector>
+          </div>
+        </tf-sidebar-helper>
       </div>
       <div class="center">
         <tf-panes-helper
-          categories="[[categories]]"
+          categories="[[_categories]]"
+          color-scale="[[_colorScale]]"
           data-type="[[dataType]]"
           data-provider="[[dataProvider]]"
           run2tag="[[run2tag]]"
-          selected-runs="[[selectedRuns]]"
+          selected-runs="[[_selectedRuns]]"
           show-download-links="[[_showDownloadLinks]]"
           download-link-url-function="[[scalarUrl]]"
           >
           <template>
             <vz-line-chart
               x-type="[[_xType]]"
-              color-scale="[[colorScale]]"
+              color-scale="[[_colorScale]]"
               smoothing-enabled="[[_smoothingEnabled]]"
               smoothing-weight="[[_smoothingWeight]]"
               tooltip-sorting-method="[[_tooltipSortingMethod]]"
@@ -147,7 +140,7 @@ The #center div contains tf-line-charts embedded inside tf-collapsable-panes.
       .log-button {
         position: absolute;
         left: 35px;
-        bottom: -30px;
+        bottom: -35px;
         color: #2196F3;
         background: #fff;
         width: 32px;
@@ -159,6 +152,23 @@ The #center div contains tf-line-charts embedded inside tf-collapsable-panes.
       .log-button-selected {
         background: var(--tb-ui-light-accent);
       }
+
+      #tooltip-sorting {
+        display: flex;
+        font-size: 14px;
+        margin-top: 5px;
+      }
+
+      #tooltip-sorting-label {
+        margin-top: 13px;
+        margin-left: 28px;
+      }
+
+      #tooltip-sorting paper-dropdown-menu {
+        margin-left: 10px;
+        --paper-input-container-focus-color: var(--tb-orange-strong);
+        width: 105px;
+      }
     </style>
 
   </template>
@@ -180,10 +190,6 @@ The #center div contains tf-line-charts embedded inside tf-collapsable-panes.
           type: Function,
           computed: "_getScalarUrl(router)"
         },
-        _visibleTags: {
-          type: Array,
-          computed: "_getVisibleTags(selectedRuns.*, run2tag.*)"
-        },
         _showDownloadLinks: {
           type: Boolean,
           notify: true,
@@ -201,10 +207,6 @@ The #center div contains tf-line-charts embedded inside tf-collapsable-panes.
           type: Boolean,
           computed: '_computeSmoothingEnabled(_smoothingWeight)'
         },
-        colorScale: {
-          type: Object,
-          notify: true,
-        },
         _xType: {
           type: String,
           value: "step"
@@ -218,11 +220,6 @@ The #center div contains tf-line-charts embedded inside tf-collapsable-panes.
       _getScalarUrl: function() {
         return this.router.scalars;
       },
-      _getVisibleTags: function() {
-        var keys = this.selectedRuns;
-        var dict = this.run2tag;
-        return _.union.apply(null, keys.map(function(k) {return dict[k]}));
-      },
       _showDownloadLinksObserver: TF.URIStorage.getBooleanObserver(
           '_showDownloadLinks', false),
       _smoothingWeightObserver: TF.URIStorage.getNumberObserver(
diff --git a/tensorflow/tensorboard/components/tf-graph-common/lib/scene/minimap.ts b/tensorflow/tensorboard/components/tf-graph-common/lib/scene/minimap.ts
index f5217ab8760..769984feb4a 100644
--- a/tensorflow/tensorboard/components/tf-graph-common/lib/scene/minimap.ts
+++ b/tensorflow/tensorboard/components/tf-graph-common/lib/scene/minimap.ts
@@ -271,7 +271,8 @@ export class Minimap {
       let blob = new Blob([svgXml], {type: 'image/svg+xml;charset=utf-8'});
       image.src = URL.createObjectURL(blob);
     };
-    image.src = 'data:image/svg+xml;charset=utf-8,' + encodeURIComponent(svgXml);
+    image.src =
+        'data:image/svg+xml;charset=utf-8,' + encodeURIComponent(svgXml);
   }
 
   /**
diff --git a/tensorflow/tensorboard/components/tf-histogram-dashboard/tf-histogram-dashboard.html b/tensorflow/tensorboard/components/tf-histogram-dashboard/tf-histogram-dashboard.html
index 77c13ed3239..ff6d3fd68e1 100644
--- a/tensorflow/tensorboard/components/tf-histogram-dashboard/tf-histogram-dashboard.html
+++ b/tensorflow/tensorboard/components/tf-histogram-dashboard/tf-histogram-dashboard.html
@@ -22,7 +22,7 @@ limitations under the License.
 <link rel="import" href="../tf-dashboard-common/tf-dashboard.html">
 <link rel="import" href="../tf-dashboard-common/tf-panes-helper.html">
 <link rel="import" href="../tf-dashboard-common/tf-option-selector.html">
-<link rel="import" href="../tf-dashboard-common/tf-run-selector.html">
+<link rel="import" href="../tf-dashboard-common/tf-sidebar-helper.html">
 <link rel="import" href="../tf-imports/lodash.html">
 <link rel="import" href="../vz-histogram-timeseries/vz-histogram-timeseries.html">
 <link rel="import" href="../iron-collapse/iron-collapse.html">
@@ -32,17 +32,17 @@ limitations under the License.
 tf-histogram-dashboard is a complete frontend that loads runs from a backend,
 and creates chart panes that display data for those runs.
 
-It provides a categorizer, run selector, and x type selector, by which the user
-can customize how data is organized and displayed.
+It provides a mode and time property selector, together with the selectors
+provided by tf-sidebar-helper, by which the user can customize how data is
+organized and displayed.
 
 Each chart has a button that can toggle whether it is "selected"; selectedRuns
 charts are larger.
 
 Organizationally, the #plumbing div contains components that have no concrete
-manifestation and just effect data bindings or data loading. The #sidebar contains
-shared controls like the tf-categorizer, tf-run-selector, and tf-x-type-selector.
-The #center div contains vz-histogram-timeseries embedded inside
-tf-collapsable-panes.
+manifestation and just effect data bindings or data loading. The .sidebar div
+contains shared controls provided by tf-sidebar-helper. The .center div
+contains vz-histogram-timeseries embedded inside tf-panes-helper's.
 -->
 <dom-module id="tf-histogram-dashboard">
   <template>
@@ -50,57 +50,52 @@ tf-collapsable-panes.
       <tf-color-scale
         id="colorScale"
         runs="[[runs]]"
-        out-color-scale="{{colorScale}}"
+        out-color-scale="{{_colorScale}}"
       ></tf-color-scale>
     </div>
 
     <tf-dashboard-layout>
       <div class="sidebar">
-        <div class="sidebar-section">
-          <tf-categorizer
-            id="categorizer"
-            tags="[[_visibleTags]]"
-            categories="{{categories}}"
-          ></tf-categorizer>
-        </div>
-        <div class="sidebar-section">
-          <tf-option-selector
-            id="histogramModeSelector"
-            name="Histogram Mode"
-            selected-id="{{_histogramMode}}"
-            >
-            <paper-button id="overlay">overlay</paper-button>
-            <paper-button id="offset">offset</paper-button>
-          </tf-option-selector>
-        </div>
-        <div class="sidebar-section">
-          <tf-option-selector
-            id="timePropertySelector"
-            name="Offset Time Axis"
-            selected-id="{{_timeProperty}}"
-            >
-            <paper-button id="step">step</paper-button>
-            <paper-button id="relative">relative</paper-button>
-            <paper-button id="wall_time">wall</paper-button>
-          </tf-option-selector>
-        </div>
-        <div class="sidebar-section">
-          <tf-run-selector
-            id="runSelector"
-            runs="[[runs]]"
-            color-scale="[[colorScale]]"
-            out-selected="{{selectedRuns}}"
-          ></tf-run-selector>
+        <tf-sidebar-helper
+          categories="{{_categories}}"
+          color-scale="[[_colorScale]]"
+          run2tag="[[run2tag]]"
+          runs="[[runs]]"
+          selected-runs="{{_selectedRuns}}"
+          show-download-links="{{_showDownloadLinks}}"
+          >
+          <div class="sidebar-section">
+            <tf-option-selector
+              id="histogramModeSelector"
+              name="Histogram Mode"
+              selected-id="{{_histogramMode}}"
+              >
+              <paper-button id="overlay">overlay</paper-button>
+              <paper-button id="offset">offset</paper-button>
+            </tf-option-selector>
           </div>
+          <div class="sidebar-section">
+            <tf-option-selector
+              id="timePropertySelector"
+              name="Offset Time Axis"
+              selected-id="{{_timeProperty}}"
+              >
+              <paper-button id="step">step</paper-button>
+              <paper-button id="relative">relative</paper-button>
+              <paper-button id="wall_time">wall</paper-button>
+            </tf-option-selector>
+          </tf-sidebar-helper>
+       </div>
       </div>
 
       <div class="center">
         <tf-panes-helper
-          categories="[[categories]]"
+          categories="[[_categories]]"
+          color-scale="[[_colorScale]]"
           data-type="[[dataType]]"
           data-provider="[[dataProvider]]"
           run2tag="[[run2tag]]"
-          selected-runs="[[selectedRuns]]"
+          selected-runs="[[_selectedRuns]]"
           repeat-for-runs
           >
           <template>
@@ -131,6 +126,10 @@ tf-collapsable-panes.
         TF.Backend.Behavior,
       ],
       properties: {
+        dataType: {
+          type: String,
+          value: "histogram"
+        },
         _histogramMode: {
           type: String,
           value: "offset"
@@ -139,60 +138,18 @@ tf-collapsable-panes.
           type: String,
           value: "step"
         },
-        _visibleTags: {
-          type: Array,
-          computed: "_getVisibleTags(selectedRuns.*, run2tag.*)"
-        },
         _colorScaleFunction: {
           type: Function,
-          computed: "_getColorScaleFunction(colorScale)"
+          computed: "_getColorScaleFunction(_colorScale)"
         },
-        colorScale: Object,
-        dataType: {
-          type: String,
-          value: "histogram"
-        }
       },
       attached: function() {
         this.async(function() {
           this.fire("rendered");
         });
       },
-      _array: function(x) {
-        return [x];
-      },
-      _count: function(tags) {
-        var targetTags = {};
-        tags.forEach(function(t) {
-          targetTags[t] = true;
-        });
-        var count = 0;
-        var _this = this;
-        this.selectedRuns.forEach(function(r) {
-          _this.run2tag[r].forEach(function(t) {
-            if (targetTags[t]) {
-              count++;
-            }
-          });
-        });
-        return count;
-      },
-      _getVisibleTags: function() {
-        var keys = this.selectedRuns;
-        var dict = this.run2tag;
-        return _.union.apply(null, keys.map(function(k) {return dict[k]}));
-      },
       _getColorScaleFunction: function() {
-        return this.colorScale.scale.bind(this.colorScale);
-      },
-      toggleSelected: function(e) {
-        var currentTarget = Polymer.dom(e.currentTarget);
-        var parentDiv = currentTarget.parentNode.parentNode;
-        parentDiv.classList.toggle("selected");
-        var chartScaffold = currentTarget.previousElementSibling;
-        if (chartScaffold) {
-          chartScaffold.chart().redraw();
-        }
+        return this._colorScale.scale.bind(this._colorScale);
       },
     });
   </script>
diff --git a/tensorflow/tensorboard/components/tf-image-dashboard/test/data/runs.json b/tensorflow/tensorboard/components/tf-image-dashboard/test/data/runs.json
deleted file mode 100644
index 23ba621b33c..00000000000
--- a/tensorflow/tensorboard/components/tf-image-dashboard/test/data/runs.json
+++ /dev/null
@@ -1 +0,0 @@
-{"run1": {"images": ["foo", "bar"]}, "run2": {"images": ["bar", "zod"]}}
diff --git a/tensorflow/tensorboard/components/tf-image-dashboard/test/imageDashboardTests.ts b/tensorflow/tensorboard/components/tf-image-dashboard/test/imageDashboardTests.ts
deleted file mode 100644
index f903326428e..00000000000
--- a/tensorflow/tensorboard/components/tf-image-dashboard/test/imageDashboardTests.ts
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-declare function stub(el: string, obj: any): void;
-
-    describe('image dashboard tests', function() {
-      var imageDash;
-      var reloadCount = 0;
-      beforeEach(function() {
-        imageDash = fixture('testElementFixture');
-        var router = TF.Backend.router('data', true);
-        var backend = new TF.Backend.Backend(router);
-        imageDash.backend = backend;
-        stub('tf-image-loader', {
-          reload: function() { reloadCount++; },
-        });
-      });
-
-      it('calling reload on dashboard reloads the image-loaders',
-         function(done) {
-           imageDash.backendReload().then(() => {
-             reloadCount = 0;
-             var loaders = [].slice.call(
-                 imageDash.getElementsByTagName('tf-image-loader'));
-             imageDash.frontendReload();
-             setTimeout(function() {
-               assert.isAbove(reloadCount, 3);
-               done();
-             });
-           });
-         });
-    });
diff --git a/tensorflow/tensorboard/components/tf-image-dashboard/test/index.html b/tensorflow/tensorboard/components/tf-image-dashboard/test/index.html
deleted file mode 100644
index c0c84d1e714..00000000000
--- a/tensorflow/tensorboard/components/tf-image-dashboard/test/index.html
+++ /dev/null
@@ -1,35 +0,0 @@
-<!doctype html>
-<!--
-@license
-Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-<html>
-<head>
-  <script src="../../webcomponentsjs/webcomponents-lite.min.js"></script>
-    <script src="../../web-component-tester/browser.js"></script>
-    <link rel="import" href="../../tf-imports/d3.html">
-    <link rel="import" href="../tf-image-dashboard.html">
-    <link rel="stylesheet" type="text/css" href="../../../lib/css/global.css">
-</head>
-<body>
-  <test-fixture id="testElementFixture">
-    <template>
-      <tf-image-dashboard></tf-image-dashboard>
-    </template>
-  </test-fixture>
-  <script src="imageDashboardTests.js"></script>
-</body>
-</html>
diff --git a/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-dashboard.html b/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-dashboard.html
index 74eb737148a..2f38f54c426 100644
--- a/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-dashboard.html
+++ b/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-dashboard.html
@@ -16,65 +16,81 @@ limitations under the License.
 -->
 
 <link rel="import" href="../polymer/polymer.html">
-<link rel="import" href="../paper-icon-button/paper-icon-button.html">
-<link rel="import" href="tf-image-grid.html">
-<link rel="import" href="../tf-dashboard-common/tf-dashboard.html">
 <link rel="import" href="../tf-backend/tf-backend.html">
+<link rel="import" href="../tf-color-scale/tf-color-scale.html">
+<link rel="import" href="../tf-dashboard-common/tf-dashboard.html">
+<link rel="import" href="../tf-dashboard-common/tf-panes-helper.html">
+<link rel="import" href="../tf-dashboard-common/tf-sidebar-helper.html">
+<link rel="import" href="tf-image-loader.html">
 
 <!--
 tf-image-dashboard displays a dashboard that loads images from a TensorFlow run.
 -->
 <dom-module id="tf-image-dashboard">
   <template>
-    <div class="center">
-      <tf-no-data-warning
-        data-type="image"
-        show-warning="[[dataNotFound]]"
-      ></tf-no-data-warning>
-      <tf-image-grid
-        id="imagegrid"
-        run-to-images="[[run2tag]]"
-        images-generator="[[dataProvider]]"
-        tags="[[tags]]"
+    <div id="plumbing">
+      <tf-color-scale
+        id="colorScale"
         runs="[[runs]]"
-      ></tf-image-grid>
+        out-color-scale="{{_colorScale}}"
+        ></tf-color-scale>
     </div>
 
+    <tf-dashboard-layout>
+      <div class="sidebar">
+        <tf-sidebar-helper
+          categories="{{_categories}}"
+          color-scale="[[_colorScale]]"
+          run2tag="[[run2tag]]"
+          runs="[[runs]]"
+          selected-runs="{{_selectedRuns}}"
+          >
+        </tf-sidebar-helper>
+      </div>
+      <div class="center">
+        <tf-panes-helper
+          categories="[[_categories]]"
+          color-scale="[[_colorScale]]"
+          data-type="[[dataType]]"
+          data-provider="[[dataProvider]]"
+          run2tag="[[run2tag]]"
+          selected-runs="[[_selectedRuns]]"
+          repeat-for-runs
+          >
+          <template>
+            <tf-image-loader color-scale="[[_colorScale]]"></tf-image-loader>
+          </template>
+        </tf-panes-helper>
+      </div>
+    </tf-dashboard-layout>
+    <style include="dashboard-style"></style>
     <style>
-      .center {
-        height: 100%;
-        width: 100%;
-        -webkit-box-sizing: border-box;
-        -moz-box-sizing: border-box;
-        box-sizing: border-box;
+      tf-panes-helper {
+        --card-width: 340px;
+        --card-height: auto;
+        --card-expanded-width: 700px;
+        --card-expanded-height: auto;
       }
-      :host {
-        height: 100%;
-        display: block;
-      }
-
     </style>
   </template>
   <script>
     Polymer({
       is: "tf-image-dashboard",
       properties: {
-        dataType: {value: "image"},
+        dataType: {
+          type: String,
+          value: "image"
+        },
       },
       behaviors: [
-        TF.Dashboard.ReloadBehavior("tf-image-loader"),
-        TF.Backend.Behavior
+          TF.Dashboard.ReloadBehavior("tf-chart-scaffold"),
+          TF.Backend.Behavior,
       ],
       attached: function() {
         this.async(function() {
           this.fire("rendered");
         });
       },
-      _hasImages: function(runToImagesChange) {
-        return _.values(runToImagesChange.base).some(function(arr) {
-          return arr.length > 0;
-        });
-      },
     });
   </script>
 </dom-module>
diff --git a/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-grid.html b/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-grid.html
deleted file mode 100644
index d34968ca707..00000000000
--- a/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-grid.html
+++ /dev/null
@@ -1,173 +0,0 @@
-<!--
-@license
-Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-<link rel="import" href="../polymer/polymer.html">
-<link rel="import" href="../paper-styles/paper-styles.html">
-<link rel="import" href="tf-image-loader.html">
-<link rel="import" href="../tf-imports/lodash.html">
-<link rel="import" href="../tf-dashboard-common/scrollbar-style.html">
-
-<!--
-tf-image-grid creates a grid for examining image data. The columns correspond
-to runs and the rows correspond to tags. Each cell is an image.
-
-Structurally, it makes extensive use of flexbox for layout: it has a top-level
-columnar flexbox that contains the topRow (run names) and then a
-bottomContainer. The bottomContainer is another columnar flexbox which contains
-repeated image-rows. Each image-row is a row flexbox which contains a tag name
-cell, and then image cells.
-
-In the future, we should improve on the layout by making the tag names and run names have fixed positions
-within the image-grid, so that when you scroll you always have context (e.g. row and column names in a spreadsheet).
-For now, it just scrolls.
-
-The image grid provides internal scroll bars (with styling) so that it can be dropped into
-a dashboard in a predictable fashion, even though the internal image grid may be enormous.
-
-Room for future improvement:
-
-- Make it obvious when an image didn't load due to the image not existing.
-- Find some way to collapse sparse image grids into denser ones (when sparsity
-is high)
-- Fix column/row names
-- Include hook for loading past images (by step/timestamp? or index?)
-
--->
-<dom-module id="tf-image-grid">
-  <template>
-    <style include="scrollbar-style"></style>
-    <div id="fullContainer" class="container scrollbar">
-      <div id="topRow" class="container">
-        <div class="noshrink cell" id="paddingCell"></div>
-        <template
-          is="dom-repeat"
-          items="[[runs]]"
-          as="run"
-        >
-        <div class="run-name-cell cell noshrink">
-          <span>[[run]]</span>
-        </div>
-      </template>
-      </div>
-      <div id="bottomContainer" class="container">
-        <template
-          is="dom-repeat"
-          items="[[tags]]"
-          as="tag"
-        >
-          <div class="image-row container noshrink">
-            <div class="tag-name-cell cell noshrink">
-              <span class="tag-name">[[tag]]</span>
-            </div>
-            <template
-              is="dom-repeat"
-              items="[[runs]]"
-              as="run"
-            >
-              <div class="image-cell cell noshrink">
-                <template is="dom-if" if="[[_exists(run, tag, runToImages.*)]]">
-                  <tf-image-loader
-                    id="loader"
-                    run="[[run]]"
-                    tag="[[tag]]"
-                    images-generator="[[imagesGenerator]]"
-                  >
-                  </tf-image-loader>
-                </template>
-              </div>
-            </template>
-          </div>
-        </template>
-      </div>
-    </div>
-    <style>
-      :host {
-        display: block;
-        height: 100%;
-      }
-      .container {
-        display: flex;
-        flex-wrap: nowrap;
-      }
-      #fullContainer {
-        width: 100%;
-        height: 100%;
-        flex-direction: column;
-        padding-top: 20px;
-        overflow: auto;
-        -webkit-box-sizing: border-box;
-        -moz-box-sizing: border-box;
-        box-sizing: border-box;
-      }
-      #topRow {
-        flex-direction: row;
-      }
-      #bottomContainer {
-        flex-direction: column;
-        height: 100%;
-        width: 100%;
-      }
-      .cell {
-        margin-right: 10px;
-      }
-      .image-row {
-        flex-direction: row;
-        padding-top: 10px;
-      }
-      .image-cell {
-        width: 300px;
-      }
-      .tag-name-cell {
-        width: 300px;
-        display:flex;
-        flex-direction: column;
-        justify-content: center;
-      }
-      .tag-name {
-        word-wrap: break-word;
-        text-align: center;
-        white-space: nowrap;
-      }
-      .run-name-cell {
-        width: 300px;
-        word-break:break-all;
-        text-align: center;
-      }
-      .noshrink {
-        flex-shrink: 0;
-      }
-      #paddingCell {
-        width: 300px;
-        height: 30px;
-      }
-    </style>
-  </template>
-  <script>
-    Polymer({
-      is: "tf-image-grid",
-      properties: {
-        runToImages: Object,
-        tags: Array,
-        runs: Array,
-        imagesGenerator: Function,
-      },
-      _exists: function (run, tag) {
-        return this.runToImages[run].indexOf(tag) !== -1;
-      },
-    });
-  </script>
-</dom-module>
diff --git a/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-loader.html b/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-loader.html
index 5156af5110d..b2e86bc6727 100644
--- a/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-loader.html
+++ b/tensorflow/tensorboard/components/tf-image-dashboard/tf-image-loader.html
@@ -17,6 +17,7 @@ limitations under the License.
 
 <link rel="import" href="../polymer/polymer.html">
 <link rel="import" href="../tf-imports/lodash.html">
+<link rel="import" href="../tf-imports/d3.html">
 
 <!--
 tf-image-loader loads an individual image from the TensorBoard backend.
@@ -25,49 +26,45 @@ Right now it always loads the most recent image. We should add support in the
 future for loading older images.
 -->
 <dom-module id="tf-image-loader">
-  <style>
-  :host {
-    display: block;
-  }
-  img {
-    image-rendering: pixelated;
-    border: 1px solid #555;
-  }
-  </style>
   <template>
-    <img src="[[imageUrl]]"
-         width="300"
-         height="[[height]]"
-         on-error="reload">
+    <img
+      id="img"
+      src="[[imageUrl]]"
+      on-error="reload">
+    <style>
+      :host {
+        display: block;
+        width: 100%;
+        height: auto;
+        position: relative;
+      }
+
+      img {
+        image-rendering: pixelated;
+        display: block;
+        width: 100%;
+        height: auto;
+      }
+    </style>
   </template>
   <script>
     Polymer({
       is: "tf-image-loader",
       properties: {
-        run: String,
-        tag: String,
-        imagesGenerator: Function,
+        colorScale: Object,
         imageUrl: String,
-        height: {
-          type: Number,
-          value: 300
-        }
       },
-      reload: function() {
-        var _this = this;
-        this.imageUrl = ""; // force reload
-        this.imagesGenerator(this.tag, this.run).then(function(metadatas) {
-          var last_metadata = _.last(metadatas);
-          _this.imageUrl = last_metadata.url;
-          _this.height = 300 / last_metadata.width * last_metadata.height;
-        });
+      setVisibleSeries: function(runs) {
+        // Do nothing.
       },
-      ready: function() {
-        // Need to test so that it will not error if it is constructed w/o
-        // all properties (so that it's possible to use stub to mock it out)
-        if (this.run != null && this.tag != null && this.imagesGenerator != null) {
-          this.reload();
-        }
+      setSeriesData: function(run, data) {
+        var last = _.last(data);
+        this.redraw(last.url);
+      },
+      redraw: function(newUrl) {
+        var url = newUrl || this.imageUrl;
+        this.imageUrl = ""; // Force redraw
+        this.imageUrl = url;
       },
     });
   </script>
diff --git a/tensorflow/tensorboard/components/vz-data-summary/vz-data-summary.ts b/tensorflow/tensorboard/components/vz-data-summary/vz-data-summary.ts
index 7a8c68ee4e0..9895f01631d 100644
--- a/tensorflow/tensorboard/components/vz-data-summary/vz-data-summary.ts
+++ b/tensorflow/tensorboard/components/vz-data-summary/vz-data-summary.ts
@@ -130,7 +130,8 @@ function getTextColor(hexTripletColor: string) {
       color = convertedHex;
     } else {
       // RGB string is currently not handled.
-      console.log(
+      /* tslint:disable:no-console */
+      console.warn(
           'WARNING: Could not convert color to hex,' +
           'please specify color as name or hex string.');
       return 'black';
diff --git a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
index 48a7723f82d..0963d126e92 100644
--- a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
+++ b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.html
@@ -87,7 +87,7 @@ such as different X scales (linear and temporal), tooltips and smoothing.
         padding: 8px;
         z-index: 5;
         cursor: none;
-        margin-top: 5px;
+        margin-top: 10px;
       }
       .swatch {
         border-radius: 50%;
diff --git a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
index 8c607b97afd..7144910e352 100644
--- a/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
+++ b/tensorflow/tensorboard/components/vz-line-chart/vz-line-chart.ts
@@ -450,7 +450,8 @@ module VZ {
       return this.name2datasets[name];
     }
 
-    static getYScaleFromType(yScaleType: string): Plottable.QuantitativeScale<number> {
+    static getYScaleFromType(yScaleType: string):
+        Plottable.QuantitativeScale<number> {
       if (yScaleType === 'log') {
         return new Plottable.Scales.ModifiedLog();
       } else if (yScaleType === 'linear') {
diff --git a/tensorflow/tensorboard/components/vz-projector/bh_tsne.ts b/tensorflow/tensorboard/components/vz-projector/bh_tsne.ts
index 95c207ec6cb..35a8328cd6d 100644
--- a/tensorflow/tensorboard/components/vz-projector/bh_tsne.ts
+++ b/tensorflow/tensorboard/components/vz-projector/bh_tsne.ts
@@ -67,10 +67,9 @@ function zerosArray(length: number): number[] {
   return result;
 }
 
-
 /** Returns the square euclidean distance between two vectors. */
 export function dist2(a: number[], b: number[]): number {
-  if (a.length != b.length) {
+  if (a.length !== b.length) {
     throw new Error('Vectors a and b must be of same length');
   }
 
@@ -105,7 +104,7 @@ function gaussRandom(rng: () => number): number {
   let u = 2 * rng() - 1;
   let v = 2 * rng() - 1;
   let r = u * u + v * v;
-  if (r == 0 || r > 1) {
+  if (r === 0 || r > 1) {
     return gaussRandom(rng);
   }
   let c = Math.sqrt(-2 * Math.log(r) / r);
@@ -173,7 +172,7 @@ function nearest2P(
       let psum = 0.0;
       for (let k = 0; k < neighbors.length; ++k) {
         let neighbor = neighbors[k];
-        let pij = (i == neighbor.index) ? 0 : Math.exp(-neighbor.dist * beta);
+        let pij = (i === neighbor.index) ? 0 : Math.exp(-neighbor.dist * beta);
         pRow[k] = pij;
         psum += pij;
       }
@@ -273,9 +272,9 @@ export class TSNE {
     this.epsilon = opt.epsilon || 10;
     this.rng = opt.rng || Math.random;
     this.dim = opt.dim;
-    if (opt.dim == 2) {
+    if (opt.dim === 2) {
       this.dist2 = dist2_2D;
-    } else if (opt.dim == 3) {
+    } else if (opt.dim === 3) {
       this.dist2 = dist2_3D;
     } else {
       this.dist2 = dist2;
@@ -376,7 +375,7 @@ export class TSNE {
     let annotateTree =
         (node: AugmSPNode): {numCells: number, yCell: number[]} => {
           let numCells = node.points ? node.points.length : 0;
-          if (node.children == null) {
+          if (node.children === null) {
             // Update the current node and tell the parent.
             node.numCells = numCells;
             // TODO(smilkov): yCell should be average across all points.
@@ -388,7 +387,7 @@ export class TSNE {
               node.points ? node.points[0].slice() : zerosArray(this.dim);
           for (let i = 0; i < node.children.length; ++i) {
             let child = node.children[i];
-            if (child == null) {
+            if (child === null) {
               continue;
             }
             let result = annotateTree(child as AugmSPNode);
@@ -433,7 +432,7 @@ export class TSNE {
       tree.visit((node: AugmSPNode) => {
         let squaredDistToCell = this.dist2(pointI, node.yCell);
         // Squared distance from point i to cell.
-        if (node.children == null ||
+        if (node.children === null ||
             (node.rCell / Math.sqrt(squaredDistToCell) < THETA)) {
           let qijZ = 1 / (1 + squaredDistToCell);
           let dZ = node.numCells * qijZ;
diff --git a/tensorflow/tensorboard/components/vz-projector/data.ts b/tensorflow/tensorboard/components/vz-projector/data.ts
index dc23defb4a8..0a3e6155db5 100644
--- a/tensorflow/tensorboard/components/vz-projector/data.ts
+++ b/tensorflow/tensorboard/components/vz-projector/data.ts
@@ -20,7 +20,6 @@ import * as scatter from './scatter';
 import {shuffle} from './util';
 import * as vector from './vector';
 
-
 /**
  * A DataSource is our ground truth data. The original parsed data should never
  * be modified, only copied out.
@@ -74,7 +73,6 @@ function hasWebGLSupport(): boolean {
 }
 
 const WEBGL_SUPPORT = hasWebGLSupport();
-const MAX_TSNE_ITERS = 500;
 /**
  * Sampling is used when computing expensive operations such as PCA, or T-SNE.
  */
@@ -109,6 +107,7 @@ export class DataSet implements scatter.DataSet {
   tSNEShouldStop = true;
   dim = [0, 0];
   private tsne: TSNE;
+  private hasTSNERun: boolean = false;
 
   /**
    * Creates a new Dataset by copying out data from an array of datapoints.
@@ -184,7 +183,6 @@ export class DataSet implements scatter.DataSet {
     return traces;
   }
 
-
   /**
    * Computes the centroid, shifts all points to that centroid,
    * then makes them all unit norm.
@@ -239,10 +237,22 @@ export class DataSet implements scatter.DataSet {
         return newV;
       });
       for (let j = 0; j < NUM_PCA_COMPONENTS; j++) {
-        let label = 'pca-' + j;
-        this.projections.add(label);
-        this.points.forEach(
-            (d, i) => { d.projections[label] = pcaVectors[i][j]; });
+        let labels = ['pca-' + j];
+        // If t-SNE hasn't run, initialize those projections with PCA
+        // projections so we see something when going to the t-SNE view.
+        if (!this.hasTSNERun && j < 3) {
+          labels.push('tsne-' + j);
+        }
+
+        for (let i = 0; i < labels.length; i++) {
+          this.projections.add(labels[i]);
+        }
+
+        this.points.forEach((d, i) => {
+          for (let k = 0; k < labels.length; k++) {
+            d.projections[labels[k]] = pcaVectors[i][j];
+          }
+        });
       }
     });
   }
@@ -251,6 +261,7 @@ export class DataSet implements scatter.DataSet {
   projectTSNE(
       perplexity: number, learningRate: number, tsneDim: number,
       stepCallback: (iter: number) => void) {
+    this.hasTSNERun = true;
     let k = Math.floor(3 * perplexity);
     let opt = {epsilon: learningRate, perplexity: perplexity, dim: tsneDim};
     this.tsne = new TSNE(opt);
@@ -258,7 +269,7 @@ export class DataSet implements scatter.DataSet {
     let iter = 0;
 
     let step = () => {
-      if (this.tSNEShouldStop || iter > MAX_TSNE_ITERS) {
+      if (this.tSNEShouldStop) {
         stepCallback(null);
         return;
       }
@@ -299,7 +310,6 @@ export class DataSet implements scatter.DataSet {
         this.tsne.initDataDist(this.nearest);
       }).then(step);
 
-
     });
   }
 
diff --git a/tensorflow/tensorboard/components/vz-projector/data_test.ts b/tensorflow/tensorboard/components/vz-projector/data_test.ts
index 07286f8bcc7..28041b1f475 100644
--- a/tensorflow/tensorboard/components/vz-projector/data_test.ts
+++ b/tensorflow/tensorboard/components/vz-projector/data_test.ts
@@ -15,7 +15,6 @@ limitations under the License.
 
 import {DataPoint, DataSet} from './data';
 
-
 /**
  * Helper method that makes a list of points given an array of
  * trace indexes.
diff --git a/tensorflow/tensorboard/components/vz-projector/external.d.ts b/tensorflow/tensorboard/components/vz-projector/external.d.ts
index de7b9607971..ae746c4d95b 100644
--- a/tensorflow/tensorboard/components/vz-projector/external.d.ts
+++ b/tensorflow/tensorboard/components/vz-projector/external.d.ts
@@ -37,4 +37,4 @@ interface Weblas {
 }
 
 declare let numeric: any;
-declare let weblas: Weblas;
\ No newline at end of file
+declare let weblas: Weblas;
diff --git a/tensorflow/tensorboard/components/vz-projector/heap.ts b/tensorflow/tensorboard/components/vz-projector/heap.ts
index 35f178e0007..ac3144e6493 100644
--- a/tensorflow/tensorboard/components/vz-projector/heap.ts
+++ b/tensorflow/tensorboard/components/vz-projector/heap.ts
@@ -90,7 +90,7 @@ export class MinHeap<T> {
         this.arr[right].key < this.arr[largest].key) {
       largest = right;
     }
-    if (largest != pos) {
+    if (largest !== pos) {
       this.swap(largest, pos);
       this.bubbleDown(largest);
     }
@@ -141,6 +141,6 @@ export class KMin<T> {
 
   /** Returns the largest key in the list. */
   getLargestKey(): number {
-    return this.maxHeap.size() == 0 ? null : -this.maxHeap.peek().key;
+    return this.maxHeap.size() === 0 ? null : -this.maxHeap.peek().key;
   }
 }
diff --git a/tensorflow/tensorboard/components/vz-projector/knn.ts b/tensorflow/tensorboard/components/vz-projector/knn.ts
index 4d64595a3fd..963845e76f5 100644
--- a/tensorflow/tensorboard/components/vz-projector/knn.ts
+++ b/tensorflow/tensorboard/components/vz-projector/knn.ts
@@ -212,7 +212,7 @@ export function findKNNofPoint<T>(
   let kMin = new KMin<NearestEntry>(k);
   let a = accessor(dataPoints[pointIndex]);
   for (let i = 0; i < dataPoints.length; ++i) {
-    if (i == pointIndex) {
+    if (i === pointIndex) {
       continue;
     }
     let b = accessor(dataPoints[i]);
diff --git a/tensorflow/tensorboard/components/vz-projector/label.ts b/tensorflow/tensorboard/components/vz-projector/label.ts
index 9689ef58697..c041a6c5cb5 100644
--- a/tensorflow/tensorboard/components/vz-projector/label.ts
+++ b/tensorflow/tensorboard/components/vz-projector/label.ts
@@ -148,4 +148,4 @@ export class CollisionGrid {
   private getCellY(y: number) {
     return Math.floor((y - this.bound.loY) / this.cellHeight);
   };
-}
\ No newline at end of file
+}
diff --git a/tensorflow/tensorboard/components/vz-projector/scatter.ts b/tensorflow/tensorboard/components/vz-projector/scatter.ts
index 392d8085c68..c6c6590272d 100644
--- a/tensorflow/tensorboard/components/vz-projector/scatter.ts
+++ b/tensorflow/tensorboard/components/vz-projector/scatter.ts
@@ -21,7 +21,6 @@ export interface Point3D {
   /** Original z coordinate. */
   z: number;
 }
-;
 
 /** The spacial data of points and lines that will be shown in the projector. */
 export interface DataSet {
@@ -74,7 +73,7 @@ export interface Scatter {
   setMode(mode: Mode): void;
   /** Returns the interaction mode. */
   getMode(): Mode;
-  /** Resets the zoom level to 1.*/
+  /** Resets the zoom level to 1. */
   resetZoom(): void;
   /**
    * Increases/decreases the zoom level.
diff --git a/tensorflow/tensorboard/components/vz-projector/scatterWebGL.ts b/tensorflow/tensorboard/components/vz-projector/scatterWebGL.ts
index 79cee64c5b9..a4608b571c9 100644
--- a/tensorflow/tensorboard/components/vz-projector/scatterWebGL.ts
+++ b/tensorflow/tensorboard/components/vz-projector/scatterWebGL.ts
@@ -219,6 +219,8 @@ export class ScatterWebGL implements Scatter {
   private pointSize3D: number;
   /** The buffer attribute that holds the positions of the points. */
   private positionBufferArray: THREE.BufferAttribute;
+  private tracePositionBufferArray: {[trace: number]:
+                                         THREE.BufferAttribute} = {};
 
   // Accessors for rendering and labeling the points.
   private xAccessor: (index: number) => number;
@@ -313,13 +315,38 @@ export class ScatterWebGL implements Scatter {
 
   /** Updates the positions buffer array to reflect the actual data. */
   private updatePositionsArray() {
+    // Update the points.
     for (let i = 0; i < this.dataSet.points.length; i++) {
       // Set position based on projected point.
       let pp = this.dataSet.points[i].projectedPoint;
       this.positionBufferArray.setXYZ(i, pp.x, pp.y, pp.z);
     }
+
+    // Update the traces.
+    for (let i = 0; i < this.dataSet.traces.length; i++) {
+      let dataTrace = this.dataSet.traces[i];
+
+      let vertexCount = 0;
+      for (let j = 0; j < dataTrace.pointIndices.length - 1; j++) {
+        let point1 = this.dataSet.points[dataTrace.pointIndices[j]];
+        let point2 = this.dataSet.points[dataTrace.pointIndices[j + 1]];
+
+        this.tracePositionBufferArray[i].setXYZ(
+            vertexCount, point1.projectedPoint.x, point1.projectedPoint.y,
+            point1.projectedPoint.z);
+        this.tracePositionBufferArray[i].setXYZ(
+            vertexCount + 1, point2.projectedPoint.x, point2.projectedPoint.y,
+            point2.projectedPoint.z);
+        vertexCount += 2;
+      }
+    }
+
     if (this.geometry) {
       this.positionBufferArray.needsUpdate = true;
+
+      for (let i = 0; i < this.dataSet.traces.length; i++) {
+        this.tracePositionBufferArray[i].needsUpdate = true;
+      }
     }
   }
 
@@ -539,23 +566,12 @@ export class ScatterWebGL implements Scatter {
       let dataTrace = this.dataSet.traces[i];
 
       let geometry = new THREE.BufferGeometry();
-      let vertices: number[] = [];
       let colors: number[] = [];
 
       for (let j = 0; j < dataTrace.pointIndices.length - 1; j++) {
         this.dataSet.points[dataTrace.pointIndices[j]].traceIndex = i;
         this.dataSet.points[dataTrace.pointIndices[j + 1]].traceIndex = i;
 
-        let point1 = this.dataSet.points[dataTrace.pointIndices[j]];
-        let point2 = this.dataSet.points[dataTrace.pointIndices[j + 1]];
-
-        vertices.push(
-            point1.projectedPoint.x, point1.projectedPoint.y,
-            point1.projectedPoint.z);
-        vertices.push(
-            point2.projectedPoint.x, point2.projectedPoint.y,
-            point2.projectedPoint.z);
-
         let color1 =
             this.getPointInTraceColor(j, dataTrace.pointIndices.length);
         let color2 =
@@ -566,9 +582,9 @@ export class ScatterWebGL implements Scatter {
             color2.g / 255, color2.b / 255);
       }
 
-      geometry.addAttribute(
-          'position',
-          new THREE.BufferAttribute(new Float32Array(vertices), XYZ_NUM_BYTES));
+      geometry.addAttribute('position', this.tracePositionBufferArray[i]);
+      this.tracePositionBufferArray[i].needsUpdate = true;
+
       geometry.addAttribute(
           'color',
           new THREE.BufferAttribute(new Float32Array(colors), RGB_NUM_BYTES));
@@ -724,7 +740,7 @@ export class ScatterWebGL implements Scatter {
       }
     } else if (
         !e.ctrlKey &&
-        this.cameraControls.mouseButtons.ORBIT == THREE.MOUSE.RIGHT) {
+        this.cameraControls.mouseButtons.ORBIT === THREE.MOUSE.RIGHT) {
       // The user happened to press the ctrl key when the tab was active,
       // unpressed the ctrl when the tab was inactive, and now he/she
       // is back to the projector tab.
@@ -732,14 +748,13 @@ export class ScatterWebGL implements Scatter {
       this.cameraControls.mouseButtons.PAN = THREE.MOUSE.RIGHT;
     } else if (
         e.ctrlKey &&
-        this.cameraControls.mouseButtons.ORBIT == THREE.MOUSE.LEFT) {
+        this.cameraControls.mouseButtons.ORBIT === THREE.MOUSE.LEFT) {
       // Similarly to the situation above.
       this.cameraControls.mouseButtons.ORBIT = THREE.MOUSE.RIGHT;
       this.cameraControls.mouseButtons.PAN = THREE.MOUSE.LEFT;
     }
   }
 
-
   /** When we stop dragging/zooming, return to normal behavior. */
   private onMouseUp(e: any) {
     if (this.selecting) {
@@ -777,7 +792,7 @@ export class ScatterWebGL implements Scatter {
     } else if (!this.mouseIsDown) {
       let lastNearestPoint = this.nearestPoint;
       this.setNearestPointToMouse(e);
-      if (lastNearestPoint != this.nearestPoint) {
+      if (lastNearestPoint !== this.nearestPoint) {
         this.onHoverListeners.forEach(l => l(this.nearestPoint));
       }
     }
@@ -833,7 +848,7 @@ export class ScatterWebGL implements Scatter {
     // Interpret the pixel as an ID.
     let id = (pixelBuffer[0] << 16) | (pixelBuffer[1] << 8) | pixelBuffer[2];
     this.nearestPoint =
-        id != 0xffffff && id < this.dataSet.points.length ? id : null;
+        id !== 0xffffff && id < this.dataSet.points.length ? id : null;
   }
 
   /** Returns the squared distance to the mouse for the i-th point. */
@@ -1304,10 +1319,21 @@ export class ScatterWebGL implements Scatter {
   setDataSet(dataSet: DataSet, spriteImage: HTMLImageElement) {
     this.dataSet = dataSet;
     this.calibratePointSize();
+
     let positions =
         new Float32Array(this.dataSet.points.length * XYZ_NUM_BYTES);
     this.positionBufferArray =
         new THREE.BufferAttribute(positions, XYZ_NUM_BYTES);
+
+    // Set up the position buffer arrays for each trace.
+    for (let i = 0; i < this.dataSet.traces.length; i++) {
+      let dataTrace = this.dataSet.traces[i];
+      let traces = new Float32Array(
+          2 * (dataTrace.pointIndices.length - 1) * XYZ_NUM_BYTES);
+      this.tracePositionBufferArray[i] =
+          new THREE.BufferAttribute(traces, XYZ_NUM_BYTES);
+    }
+
     this.image = spriteImage;
     this.shuffledData = new Array(this.dataSet.points.length);
     for (let i = 0; i < this.dataSet.points.length; i++) {
diff --git a/tensorflow/tensorboard/components/vz-projector/util.ts b/tensorflow/tensorboard/components/vz-projector/util.ts
index 975f7e122cb..5941c4174d9 100644
--- a/tensorflow/tensorboard/components/vz-projector/util.ts
+++ b/tensorflow/tensorboard/components/vz-projector/util.ts
@@ -40,4 +40,4 @@ export function assert(condition: boolean, message?: string) {
     message = message || 'Assertion failed';
     throw new Error(message);
   }
-}
\ No newline at end of file
+}
diff --git a/tensorflow/tensorboard/components/vz-projector/vector.ts b/tensorflow/tensorboard/components/vz-projector/vector.ts
index edb6e9bdd03..82fef77b7b0 100644
--- a/tensorflow/tensorboard/components/vz-projector/vector.ts
+++ b/tensorflow/tensorboard/components/vz-projector/vector.ts
@@ -15,7 +15,6 @@ limitations under the License.
 
 import {assert} from './util';
 
-
 /**
  * @fileoverview Useful vector utilities.
  */
@@ -25,7 +24,7 @@ export type Point2D = [number, number];
 
 /** Returns the dot product of two vectors. */
 export function dot(a: Vector, b: Vector): number {
-  assert(a.length == b.length, 'Vectors a and b must be of same length');
+  assert(a.length === b.length, 'Vectors a and b must be of same length');
   let result = 0;
   for (let i = 0; i < a.length; ++i) {
     result += a[i] * b[i];
@@ -44,7 +43,7 @@ export function sum(a: Vector): number {
 
 /** Returns the sum of two vectors, i.e. a + b */
 export function add(a: Vector, b: Vector): Vector {
-  assert(a.length == b.length, 'Vectors a and b must be of same length');
+  assert(a.length === b.length, 'Vectors a and b must be of same length');
   let result = new Array(a.length);
   for (let i = 0; i < a.length; ++i) {
     result[i] = a[i] + b[i];
@@ -54,7 +53,7 @@ export function add(a: Vector, b: Vector): Vector {
 
 /** Subtracts vector b from vector a, i.e. returns a - b */
 export function sub(a: Vector, b: Vector): Vector {
-  assert(a.length == b.length, 'Vectors a and b must be of same length');
+  assert(a.length === b.length, 'Vectors a and b must be of same length');
   let result = new Array(a.length);
   for (let i = 0; i < a.length; ++i) {
     result[i] = a[i] - b[i];
@@ -78,7 +77,7 @@ export function dist(a: Vector, b: Vector): number {
 
 /** Returns the square euclidean distance between two vectors. */
 export function dist2(a: Vector, b: Vector): number {
-  assert(a.length == b.length, 'Vectors a and b must be of same length');
+  assert(a.length === b.length, 'Vectors a and b must be of same length');
   let result = 0;
   for (let i = 0; i < a.length; ++i) {
     let diff = a[i] - b[i];
@@ -107,7 +106,7 @@ export function dist2_3D(a: Vector, b: Vector): number {
  * exit (returns -1) if the distance is >= to the provided limit.
  */
 export function dist2WithLimit(a: Vector, b: Vector, limit: number): number {
-  assert(a.length == b.length, 'Vectors a and b must be of same length');
+  assert(a.length === b.length, 'Vectors a and b must be of same length');
   let result = 0;
   for (let i = 0; i < a.length; ++i) {
     let diff = a[i] - b[i];
@@ -184,7 +183,7 @@ export type Predicate<T> = (a: T) => boolean;
 export function centroid<T>(
     dataPoints: T[], predicate: Predicate<T>,
     accessor?: (a: T) => Vector): {centroid: Vector, numMatches: number} {
-  if (accessor == null) {
+  if (accessor === null) {
     accessor = (a: T) => <any>a;
   }
   assert(dataPoints.length >= 0, '`vectors` must be of length >= 1');
@@ -201,7 +200,7 @@ export function centroid<T>(
       centroid[j] += vector[j];
     }
   }
-  if (n == 0) {
+  if (n === 0) {
     return {centroid: null, numMatches: 0};
   }
   for (let j = 0; j < centroid.length; ++j) {
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector-data-loader.ts b/tensorflow/tensorboard/components/vz-projector/vz-projector-data-loader.ts
index f44bb6bbe97..6b7515c2ddf 100644
--- a/tensorflow/tensorboard/components/vz-projector/vz-projector-data-loader.ts
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector-data-loader.ts
@@ -17,7 +17,6 @@ import {runAsyncTask, updateMessage} from './async';
 import {DataPoint, DataSet, DatasetMetadata, DataSource} from './data';
 import {PolymerElement} from './vz-projector-util';
 
-
 /** Prefix added to the http requests when asking the server for data. */
 const DATA_URL = 'data';
 
@@ -145,7 +144,7 @@ class DataLoader extends DataLoaderPolymer {
     let names = Object.keys(info.tensors)
                     .filter(name => {
                       let shape = info.tensors[name];
-                      return shape.length == 2 && shape[0] > 1 && shape[1] > 1;
+                      return shape.length === 2 && shape[0] > 1 && shape[1] > 1;
                     })
                     .sort((a, b) => info.tensors[b][0] - info.tensors[a][0]);
     this.tensorNames =
@@ -157,7 +156,7 @@ class DataLoader extends DataLoaderPolymer {
     let labelIndex = -1;
     this.labelOptions = columnStats.length > 1 ? columnStats.map((stats, i) => {
       // Make the default label by the first non-numeric column.
-      if (!stats.isNumeric && labelIndex == -1) {
+      if (!stats.isNumeric && labelIndex === -1) {
         labelIndex = i;
       }
       return stats.name;
@@ -169,7 +168,7 @@ class DataLoader extends DataLoaderPolymer {
     let standardColorOption: ColorOption[] = [
       {name: 'No color map'},
       // TODO(smilkov): Implement this.
-      //{name: 'Distance of neighbors',
+      // {name: 'Distance of neighbors',
       //    desc: 'How far is each point from its neighbors'}
     ];
     let metadataColorOption: ColorOption[] =
@@ -185,7 +184,7 @@ class DataLoader extends DataLoaderPolymer {
                 // Re-order the range.
                 let newRange = range.map((color, i) => {
                   let index = (i * 2) % (range.length - 1);
-                  if (index == 0) {
+                  if (index === 0) {
                     index = range.length - 1;
                   }
                   return range[index];
@@ -217,12 +216,12 @@ class DataLoader extends DataLoaderPolymer {
 
     // Demo dataset dropdown
     let demoDatasetChanged = (demoDataSet: DemoDataset) => {
-      if (demoDataSet == null) {
+      if (demoDataSet === null) {
         return;
       }
 
       this.dom.selectAll('.file-name').style('display', 'none');
-      let separator = demoDataSet.fpath.substr(-3) == 'tsv' ? '\t' : ' ';
+      let separator = demoDataSet.fpath.substr(-3) === 'tsv' ? '\t' : ' ';
       fetchDemoData(`${DATA_URL}/${demoDataSet.fpath}`, separator)
           .then(points => {
 
@@ -391,7 +390,7 @@ function parseTensors(content: string, delim = '\t'): Promise<DataPoint[]> {
     let lines = content.split('\n');
     lines.forEach(line => {
       line = line.trim();
-      if (line == '') {
+      if (line === '') {
         return;
       }
       let row = line.split(delim);
@@ -403,17 +402,17 @@ function parseTensors(content: string, delim = '\t'): Promise<DataPoint[]> {
         projectedPoint: null
       };
       // If the first label is not a number, take it as the label.
-      if (isNaN(row[0] as any) || numDim == row.length - 1) {
+      if (isNaN(row[0] as any) || numDim === row.length - 1) {
         dataPoint.metadata['label'] = row[0];
         dataPoint.vector = row.slice(1).map(Number);
       } else {
         dataPoint.vector = row.map(Number);
       }
       data.push(dataPoint);
-      if (numDim == null) {
+      if (numDim === null) {
         numDim = dataPoint.vector.length;
       }
-      if (numDim != dataPoint.vector.length) {
+      if (numDim !== dataPoint.vector.length) {
         updateMessage('Parsing failed. Vector dimensions do not match');
         throw Error('Parsing failed');
       }
@@ -439,10 +438,10 @@ function parseAndMergeMetadata(
     content: string, data: DataPoint[]): Promise<ColumnStats[]> {
   return runAsyncTask('Parsing metadata...', () => {
     let lines = content.split('\n').filter(line => line.trim().length > 0);
-    let hasHeader = (lines.length - 1 == data.length);
+    let hasHeader = (lines.length - 1 === data.length);
 
     // Dimension mismatch.
-    if (lines.length != data.length && !hasHeader) {
+    if (lines.length !== data.length && !hasHeader) {
       throw Error('Dimensions do not match');
     }
 
diff --git a/tensorflow/tensorboard/components/vz-projector/vz-projector.ts b/tensorflow/tensorboard/components/vz-projector/vz-projector.ts
index e8b1a882ec8..cf4492d080c 100644
--- a/tensorflow/tensorboard/components/vz-projector/vz-projector.ts
+++ b/tensorflow/tensorboard/components/vz-projector/vz-projector.ts
@@ -21,7 +21,6 @@ import * as vector from './vector';
 import {ColorOption} from './vz-projector-data-loader';
 import {PolymerElement} from './vz-projector-util';
 
-
 /** T-SNE perplexity. Roughly how many neighbors each point influences. */
 let perplexity: number = 30;
 /** T-SNE learning rate. */
@@ -366,7 +365,7 @@ class Projector extends ProjectorPolymer {
     searchButton.on('click', () => {
       let mode = this.scatter.getMode();
       this.scatter.setMode(mode === Mode.SEARCH ? Mode.HOVER : Mode.SEARCH);
-      if (this.scatter.getMode() == Mode.HOVER) {
+      if (this.scatter.getMode() === Mode.HOVER) {
         this.selectedPoints = [];
         this.selectionWasUpdated();
       } else {
@@ -441,8 +440,8 @@ class Projector extends ProjectorPolymer {
         let pointHighlightColor = modeIsNight ? POINT_HIGHLIGHT_COLOR_NIGHT :
                                                 POINT_HIGHLIGHT_COLOR_DAY;
         this.highlightedPoints = pointIndices.map((index, i) => {
-          let color = i == 0 ? pointHighlightColor :
-                               this.dist2color(neighbors[i - 1].dist, minDist);
+          let color = i === 0 ? pointHighlightColor :
+                                this.dist2color(neighbors[i - 1].dist, minDist);
           return {index: index, color: color};
         });
       }
@@ -486,18 +485,18 @@ class Projector extends ProjectorPolymer {
     if (!selectedPoints.length) {
       this.selectedPoints = [];
       this.updateNNList([]);
-    }
+
     // If only one point is selected, we want to get its nearest neighbors
     // and change the UI accordingly.
-    else if (selectedPoints.length === 1) {
+    } else if (selectedPoints.length === 1) {
       let selectedPoint = selectedPoints[0];
       this.showTab('inspector');
       let neighbors = this.findNeighbors(selectedPoint);
       this.selectedPoints = [selectedPoint].concat(neighbors.map(n => n.index));
       this.updateNNList(neighbors);
-    }
+
     // Otherwise, select all points and hide nearest neighbors list.
-    else {
+    } else {
       this.selectedPoints = selectedPoints as number[];
       this.highlightedPoints = [];
       this.updateNNList([]);
@@ -512,7 +511,7 @@ class Projector extends ProjectorPolymer {
       let x = this.pcaX;
       let y = this.pcaY;
       let z = this.pcaZ;
-      let hasZ = dimension == 3;
+      let hasZ = dimension === 3;
       this.scatter.setXAccessor(i => this.points[i].projections['pca-' + x]);
       this.scatter.setYAccessor(i => this.points[i].projections['pca-' + y]);
       this.scatter.setZAccessor(
@@ -572,6 +571,7 @@ class Projector extends ProjectorPolymer {
     this.scatter.setZAccessor(
         dimension === 3 ? (i => this.points[i].projections['tsne-2']) : null);
     this.scatter.setAxisLabels('tsne-0', 'tsne-1');
+    this.scatter.update();
   }
 
   private runTSNE() {
@@ -594,23 +594,25 @@ class Projector extends ProjectorPolymer {
       let selectedPoint = this.points[this.selectedPoints[0]];
 
       for (let metadataKey in selectedPoint.metadata) {
-        let rowElement = document.createElement('div');
-        rowElement.className = 'ink-panel-metadata-row vz-projector';
+        if (selectedPoint.hasOwnProperty(metadataKey)) {
+          let rowElement = document.createElement('div');
+          rowElement.className = 'ink-panel-metadata-row vz-projector';
 
-        let keyElement = document.createElement('div');
-        keyElement.className = 'ink-panel-metadata-key vz-projector';
-        keyElement.textContent = metadataKey;
+          let keyElement = document.createElement('div');
+          keyElement.className = 'ink-panel-metadata-key vz-projector';
+          keyElement.textContent = metadataKey;
 
-        let valueElement = document.createElement('div');
-        valueElement.className = 'ink-panel-metadata-value vz-projector';
-        valueElement.textContent = '' + selectedPoint.metadata[metadataKey];
+          let valueElement = document.createElement('div');
+          valueElement.className = 'ink-panel-metadata-value vz-projector';
+          valueElement.textContent = '' + selectedPoint.metadata[metadataKey];
 
-        rowElement.appendChild(keyElement);
-        rowElement.appendChild(valueElement);
+          rowElement.appendChild(keyElement);
+          rowElement.appendChild(valueElement);
 
-        metadataContainerElement.append(function() {
-          return this.appendChild(rowElement);
-        });
+          metadataContainerElement.append(function() {
+            return this.appendChild(rowElement);
+          });
+        }
       }
 
       display = true;
@@ -631,7 +633,7 @@ class Projector extends ProjectorPolymer {
           NN_HIGHLIGHT_COLOR;
     };
     let favor = (i: number) => {
-      return i == 0 || (i < this.highlightedPoints.length ? false : true);
+      return i === 0 || (i < this.highlightedPoints.length ? false : true);
     };
     this.scatter.highlightPoints(allPoints, stroke, favor);
     this.updateIsolateButton();
@@ -667,7 +669,7 @@ class Projector extends ProjectorPolymer {
     let nnlist = this.dom.select('.nn-list');
     nnlist.html('');
 
-    if (neighbors.length == 0) {
+    if (neighbors.length === 0) {
       this.dom.select('#nn-title').text('');
       return;
     }
@@ -727,7 +729,7 @@ class Projector extends ProjectorPolymer {
     if (pattern == null) {
       return {numMatches: 0};
     }
-    if (pattern == '') {
+    if (pattern === '') {
       if (this.allCentroid == null) {
         this.allCentroid =
             vector.centroid(this.points, () => true, accessor).centroid;
@@ -738,7 +740,8 @@ class Projector extends ProjectorPolymer {
     let regExp: RegExp;
     let predicate: (a: DataPoint) => boolean;
     // Check for a regex.
-    if (pattern.charAt(0) == '/' && pattern.charAt(pattern.length - 1) == '/') {
+    if (pattern.charAt(0) === '/' &&
+        pattern.charAt(pattern.length - 1) === '/') {
       pattern = pattern.slice(1, pattern.length - 1);
       try {
         regExp = new RegExp(pattern, 'i');
@@ -749,7 +752,7 @@ class Projector extends ProjectorPolymer {
           (a: DataPoint) => { return regExp.test('' + a.metadata['label']); };
       // else does an exact match
     } else {
-      predicate = (a: DataPoint) => { return a.metadata['label'] == pattern; };
+      predicate = (a: DataPoint) => { return a.metadata['label'] === pattern; };
     }
     return vector.centroid(this.points, predicate, accessor);
   }
diff --git a/tensorflow/tensorboard/components/vz-sorting/sorting.ts b/tensorflow/tensorboard/components/vz-sorting/sorting.ts
index d80f805f450..c1a656c34b8 100644
--- a/tensorflow/tensorboard/components/vz-sorting/sorting.ts
+++ b/tensorflow/tensorboard/components/vz-sorting/sorting.ts
@@ -28,8 +28,12 @@ module VZ.Sorting {
     let ai = 0;
     let bi = 0;
     while (true) {
-      if (ai === a.length) return bi === b.length ? 0 : -1;
-      if (bi === b.length) return 1;
+      if (ai === a.length) {
+        return bi === b.length ? 0 : -1;
+      }
+      if (bi === b.length) {
+        return 1;
+      }
       if (isDigit(a[ai]) && isDigit(b[bi])) {
         const ais = ai;
         const bis = bi;
@@ -37,12 +41,18 @@ module VZ.Sorting {
         bi = consumeNumber(b, bi + 1);
         const an = parseFloat(a.slice(ais, ai));
         const bn = parseFloat(b.slice(bis, bi));
-        if (an < bn) return -1;
-        if (an > bn) return 1;
+        if (an < bn) {
+          return -1;
+        }
+        if (an > bn) {
+          return 1;
+        }
         continue;
       }
       if (isBreak(a[ai])) {
-        if (!isBreak(b[bi])) return -1;
+        if (!isBreak(b[bi])) {
+          return -1;
+        }
       } else if (isBreak(b[bi])) {
         return 1;
       } else if (a[ai] < b[bi]) {
@@ -80,7 +90,9 @@ module VZ.Sorting {
           break;
         }
       } else if (state === State.EXPONENT) {
-        if (!isDigit(s[i])) break;
+        if (!isDigit(s[i])) {
+          break;
+        }
       }
     }
     return i;
diff --git a/tensorflow/tensorboard/dist/index.html b/tensorflow/tensorboard/dist/index.html
index 38f3ab14484..5931a3f2f26 100644
--- a/tensorflow/tensorboard/dist/index.html
+++ b/tensorflow/tensorboard/dist/index.html
@@ -19,45 +19,10 @@ limitations under the License.
 <html>
   <head>
     <title>TensorBoard</title>
-
-    <script src="external/webcomponentsjs/webcomponents-lite.min.js"></script>
+    <script src="webcomponentsjs/webcomponents-lite.min.js"></script>
     <link rel="stylesheet" type="text/css" href="lib/css/global.css">
     <link rel="shortcut icon" href="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAMQAAADECAMAAAD3eH5ZAAAABGdBTUEAALGPC/xhBQAAAAFzUkdCAK7OHOkAAAD/UExURfFlKfaELvFmKfNyK/67NvWALf68Nv69NvNxK/20NfyyNP22NfN0K/JrKvqhMv2zNf25Nf24Nf23NfeOL/yzNPyvNPJoKviWMPmeMfN1K/WBLfePL/FnKfeML/qlMvR7LPmcMfeLL/aJLvR5LPFoKfJuKvR3LP66NvywNPeNL/V/LfaILv21Nf26NfNzK/NvK/R6LPmaMfyxNPqfMvV+LfurM/iSMPmbMfJvKvmdMfumM/qiMvmZMfytNPJqKvysNPN2K/iYMPNwK/upM/JtKvJsKviVMPaHLvaGLvJpKvR8LPaKLvqkMvuqM/aFLvR4LPuoM/iTMPWDLfiRMPmYMXS0ngkAAALoSURBVHja7drnctpAFIbhFUISSKJ3MKYa0+y4xTW9937/15JkJhlTjhrSrHRmvuf/as6L0YLFCgEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMBJ6njenqspzgnPrsrGX9Zpi2tCrmnc6+dYNthVY5WpMmxQLWPdMsOuYVwzNj3ei2t3mQwaV43BJPDCS2NbJ5aEeuX/+9qcjQOtfFIkIkrvY2g4MVcmOBsFWbowKO/kNyj62gRpJcDaPBlxLr1B0zdG0C/8LzbJiJrshuvy1gzlA9+rD8mIkuyIJjFE3/dqnYwoSm7IUEPoD/wut8iIguSIDjlFxe/yfXL5vuSI21BTZLLhXoOILMO8Hxwa/L8bI0LfmUdhGowb2ZvT0e57pFNDgB06IlVyjmmIBl2T/nl9Rw6SD9GgSG/Q0uQkaW3XhmovKQ3eFQ4N2Uo9OQ1eFZsNerf7vP+rO4rhmY1Lg3vFVoP8+8BXg1sFnwbnCk4NThW8GuiKBDdkVVtTNFvNelVsNqTbyWnIOM2oeTRoyWvwmpJHg/ucXBrcJuXT4DwrpwZi2vy0VCx8YtXg/D2bU4OfiuQ3eFfE2KD4bfCqiLNB993gXsGlwa2CT4NzBacGIVQ6YsipQdh0xEdODUKjIxrSp88onZ8zbbFLg1DoiFO5BXvDGv2My9/JhUT8JUZTI0yDaNHLBzIbvqTDNYhUiVw/kdjQ1kM2CHFDPjKW+KzyRTF0g/ga9w9y+fANQpxvX8CU+Ny7FUWDeF3Y+g3lROIf4k0UDX9eCyvO531PyYhHga9zvPZJU5b73Y/eXj8Hv9D48n6HaF5LbcjRt8TZTtda5M1DfXnbkX1C0SHCFKzQB5Fe8op4GNGNHavvZESbVwT5r6W1xyuCPBY3Y9YgDqzknH/e3YfNzzuL30l0IebrZ5kKtuDIXt1n868ET6kf3/49tLvrCcZyF8Pu215dAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAcPIbNrBhOaBXucoAAAAASUVORK5CYII=">
-
-    <script src="external/lodash/lodash.min.js"></script>
-    <script src="external/d3/d3.min.js"></script>
-    <script src="external/plottable/plottable.min.js"></script>
-    <link rel="stylesheet" type="text/css" href="external/plottable/plottable.css">
-
-    <script src="external/graphlib/dist/graphlib.core.min.js"></script>
-    <script src="external/dagre/dist/dagre.core.min.js"></script>
-
-    <link rel="import" href="external/polymer/polymer.html">
-
-    <link rel="import" href="external/iron-ajax/iron-ajax.html">
-    <link rel="import" href="external/iron-collapse/iron-collapse.html">
-    <link rel="import" href="external/iron-icons/iron-icons.html">
-    <link rel="import" href="external/iron-list/iron-list.html">
-    <link rel="import" href="external/paper-button/paper-button.html">
-    <link rel="import" href="external/paper-checkbox/paper-checkbox.html">
-    <link rel="import" href="external/paper-dialog/paper-dialog.html">
-    <link rel="import" href="external/paper-dropdown-menu/paper-dropdown-menu.html">
-    <link rel="import" href="external/paper-header-panel/paper-header-panel.html">
-    <link rel="import" href="external/paper-icon-button/paper-icon-button.html">
-    <link rel="import" href="external/paper-input/paper-input.html">
-    <link rel="import" href="external/paper-item/paper-item.html">
-    <link rel="import" href="external/paper-menu/paper-menu.html">
-    <link rel="import" href="external/paper-progress/paper-progress.html">
-    <link rel="import" href="external/paper-radio-button/paper-radio-button.html">
-    <link rel="import" href="external/paper-radio-group/paper-radio-group.html">
-    <link rel="import" href="external/paper-slider/paper-slider.html">
-    <link rel="import" href="external/paper-styles/paper-styles.html">
-    <link rel="import" href="external/paper-toggle-button/paper-toggle-button.html">
-    <link rel="import" href="external/paper-toolbar/paper-toolbar.html">
-    <link rel="import" href="external/paper-tabs/paper-tabs.html">
     <link rel="import" href="dist/tf-tensorboard.html">
-
-
   </head>
   <body>
     <tf-tensorboard></tf-tensorboard>
diff --git a/tensorflow/tensorboard/dist/tf-tensorboard.html b/tensorflow/tensorboard/dist/tf-tensorboard.html
index fda832bfda5..49117764333 100644
--- a/tensorflow/tensorboard/dist/tf-tensorboard.html
+++ b/tensorflow/tensorboard/dist/tf-tensorboard.html
@@ -32,7 +32,15 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
---><meta charset="UTF-8">
+--><meta charset="UTF-8"><link rel="import" href="../polymer/polymer.html">
+<link rel="import" href="../iron-icons/iron-icons.html">
+<link rel="import" href="../paper-tabs/paper-tabs.html">
+<link rel="import" href="../paper-dialog/paper-dialog.html">
+<link rel="import" href="../paper-checkbox/paper-checkbox.html">
+<link rel="import" href="../paper-toolbar/paper-toolbar.html">
+<link rel="import" href="../paper-button/paper-button.html">
+<link rel="import" href="../paper-header-panel/paper-header-panel.html">
+
 <script>/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -126,6 +134,9 @@ var TF;
 </script>
 </dom-module>
 
+<script src="../lodash/lodash.min.js"></script>
+<link rel="import" href="../paper-slider/paper-slider.html">
+<link rel="import" href="../paper-input/paper-input.html">
 
 <dom-module id="tf-smoothing-input" assetpath="../tf-event-dashboard/">
   <template>
@@ -222,6 +233,7 @@ var TF;
     });
   </script>
 </dom-module>
+<script src="../d3/d3.js"></script>
 <script>/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -1249,6 +1261,826 @@ var TF;
     })();
   </script>
 </dom-module>
+<link rel="import" href="../paper-styles/paper-styles.html">
+
+<dom-module id="scrollbar-style" assetpath="../tf-dashboard-common/">
+  <template>
+    <style>
+      .scrollbar::-webkit-scrollbar-track
+      {
+        visibility: hidden;
+      }
+
+      .scrollbar::-webkit-scrollbar
+      {
+        width: 10px;
+      }
+
+      .scrollbar::-webkit-scrollbar-thumb
+      {
+        border-radius: 10px;
+        -webkit-box-shadow: inset 0 0 2px rgba(0,0,0,.3);
+        background-color: var(--paper-grey-500);
+        color: var(--paper-grey-900);
+      }
+      .scrollbar {
+        box-sizing: border-box;
+      }
+    </style>
+  </template>
+</dom-module>
+<style is="custom-style">
+
+  :root {
+    --tb-orange-weak: #ffa726;
+    --tb-orange-strong: #f57c00;
+    --tb-grey-darker: #e2e2e2;
+    --tb-grey-lighter: #f3f3f3;
+    --tb-ui-dark-accent: #757575;
+    --tb-ui-light-accent: #e0e0e0;
+    --tb-graph-faded: #e0d4b3;
+  }
+
+</style>
+
+<dom-module id="tf-dashboard-layout" assetpath="../tf-dashboard-common/">
+  <template>
+    <div id="sidebar">
+      <content select=".sidebar"></content>
+    </div>
+
+    <div id="center" class="scrollbar">
+      <content select=".center"></content>
+    </div>
+    <style include="scrollbar-style"></style>
+    <style>
+      #sidebar {
+        width: inherit;
+        height: 100%;
+        overflow: ellipsis;
+        flex-grow: 0;
+        flex-shrink: 0;
+      }
+
+      #center {
+        height: 100%;
+        overflow-y: auto;
+        flex-grow: 1;
+        flex-shrink: 1;
+      }
+
+      .tf-graph-dashboard #center {
+        background: white;
+      }
+
+      :host {
+        display: flex;
+        flex-direction: row;
+        height: 100%;
+      }
+    </style>
+  </template>
+  <script>
+    Polymer({
+      is: "tf-dashboard-layout",
+    });
+  </script>
+</dom-module>
+<dom-module id="dashboard-style" assetpath="../tf-dashboard-common/">
+  <template>
+    <style>
+      .sidebar {
+        display: flex;
+        flex-direction: column;
+        height: 100%;
+        margin-right: 20px;
+      }
+
+      .sidebar-section {
+        border-top: solid 1px rgba(0, 0, 0, 0.12);
+        padding: 20px 0px 20px 30px;
+      }
+
+      .sidebar-section:first-child {
+        border: none;
+      }
+
+      .sidebar-section:last-child {
+        flex-grow: 1;
+        display: flex;
+      }
+
+      paper-checkbox {
+        --paper-checkbox-checked-color: var(--tb-ui-dark-accent);
+        --paper-checkbox-unchecked-color: var(--tb-ui-dark-accent);
+        font-size: 14px;
+        margin-top: 5px;
+      }
+    </style>
+  </template>
+</dom-module>
+<link rel="import" href="../paper-dropdown-menu/paper-dropdown-menu.html">
+<link rel="import" href="../paper-menu/paper-menu.html">
+<link rel="import" href="../paper-item/paper-item.html">
+
+<dom-module id="tf-downloader" assetpath="../tf-dashboard-common/">
+  <template>
+    <paper-dropdown-menu no-label-float="true" label="run to download" selected-item-label="{{_run}}">
+      <paper-menu class="dropdown-content">
+        <template is="dom-repeat" items="[[runs]]">
+          <paper-item no-label-float="true">[[item]]</paper-item>
+        </template>
+      </paper-menu>
+    </paper-dropdown-menu>
+    <div class="center">
+      <span>
+        <a download="[[_csvName(_run)]]" href="[[_csvUrl(_run, urlFn)]]">CSV</a>
+        <a download="[[_jsonName(_run)]]" href="[[_jsonUrl(_run, urlFn)]]">JSON</a>
+      </span>
+    </div>
+    <style>
+      :host {
+        display: flex;
+        height: 32px;
+      }
+      .center {
+        display: flex;
+        align-self: center;
+      }
+      paper-dropdown-menu {
+        width: 100px;
+        --paper-input-container-label: {
+          font-size: 10px;
+        }
+        --paper-input-container-input: {
+          font-size: 10px;
+        }
+      }
+      a {
+        font-size: 10px;
+        border-radius: 3px;
+        border: 1px solid #EEE;
+      }
+      paper-input {
+        font-size: 22px;
+      }
+    </style>
+  </template>
+  <script>
+    Polymer({
+      is: "tf-downloader",
+      properties: {
+        _run: String,
+        runs: Array,
+        tag: String,
+        urlFn: Function,
+      },
+      _csvUrl: function(_run, urlFn) {
+        return urlFn(this.tag, _run) + "&format=csv";
+      },
+      _jsonUrl: function(_run, urlFn) {
+        return urlFn(this.tag, _run);
+      },
+      _csvName: function(_run) {
+        return "run_" + _run + ",tag_" + this.tag + ".csv";
+      },
+      _jsonName: function(_run) {
+        return "run-" + _run + "-tag-" + this.tag + ".json";
+      },
+    });
+  </script>
+</dom-module>
+
+<dom-module id="tf-no-data-warning" assetpath="../tf-dashboard-common/">
+  <template>
+    <template is="dom-if" if="[[showWarning]]">
+      <div class="warning">
+        <template is="dom-if" if="[[graphMode]]">
+          <h3>
+            No graph definition files were found.
+          </h3>
+          <p>
+            To store a graph, create a
+            <code>tf.train.SummaryWriter</code>
+            and pass the graph either via the constructor, or by calling its
+            <code>add_graph()</code> method.
+            You may want to check out the
+            <a href="https://www.tensorflow.org/versions/master/how_tos/graph_viz/index.html">
+              graph visualizer tutorial
+            </a>
+            .
+          </p>
+        </template>
+        <template is="dom-if" if="[[!graphMode]]">
+          <h3>
+            No <span>[[dataType]]</span> data was found.
+          </h3>
+          <p>
+            Probable causes:
+            </p><ul>
+              <li>
+                You haven't written any <span>[[dataType]]</span> data
+                to your event files.
+              </li>
+              <li>
+                TensorBoard can't find your event files.
+              </li>
+            </ul>
+          <p></p>
+        </template>
+        <p>
+          If you're new to using TensorBoard, and want to find out how to add
+          data and set up your event files, check out the
+          <a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tensorboard/README.md">
+            README
+          </a>
+          and perhaps the
+          <a href="https://www.tensorflow.org/versions/master/how_tos/summaries_and_tensorboard/index.html">
+            TensorBoard tutorial
+          </a>.
+        </p>
+
+        <p>
+          If you think TensorBoard is configured properly, please see the
+          <a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tensorboard/README.md#my-tensorboard-isnt-showing-any-data-whats-wrong">
+            section of the README devoted to missing data problems
+          </a>
+          and consider filing an issue on GitHub.
+        </p>
+
+      </div>
+    </template>
+    <style>
+      .warning {
+        max-width: 540px;
+        margin: 80px auto 0 auto;
+      }
+    </style>
+  </template>
+
+  <script>
+    Polymer({
+      is: "tf-no-data-warning",
+      properties: {
+        dataType: String,
+        showWarning: Boolean,
+        graphMode: {type: Boolean, computed: "_isGraph(dataType)"},
+      },
+      _isGraph: function(dataType) {
+        return dataType === "graph";
+      },
+    });
+  </script>
+</dom-module>
+<script>/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+var TF;
+(function (TF) {
+    var Dashboard;
+    (function (Dashboard) {
+        /**
+         * ReloadBehavior: A simple behavior for dashboards where the
+         * frontendReload() function should find every child element with a
+         * given tag name (e.g. "tf-line-chart" or "tf-image-loader")
+         * and call a `reload` method on that child.
+         * May later extend it so it has more sophisticated logic, e.g. reloading
+         * only tags that are in view.
+         */
+        function ReloadBehavior(tagName) {
+            return {
+                properties: {
+                    reloadTag: {
+                        type: String,
+                        value: tagName,
+                    },
+                },
+                frontendReload: function () {
+                    var elements = this.getElementsByTagName(this.reloadTag);
+                    Array.prototype.forEach.call(elements, function (x) { x.reload(); });
+                },
+            };
+        }
+        Dashboard.ReloadBehavior = ReloadBehavior;
+    })(Dashboard = TF.Dashboard || (TF.Dashboard = {}));
+})(TF || (TF = {}));
+</script>
+
+<dom-module id="tf-option-selector" assetpath="../tf-dashboard-common/">
+  <template>
+    <div id="wrap">
+      <h3>[[name]]</h3>
+      <div class="content-wrapper"><content></content></div>
+    </div>
+    <style>
+      .content-wrapper ::content > * {
+        width: 30%;
+        font-size: 13px;
+        background: none;
+        margin-top: 10px;
+        color: var(--tb-ui-dark-accent);
+      }
+
+      .content-wrapper ::content :first-of-type {
+        margin-left: 0;
+      }
+
+      .content-wrapper ::content .selected {
+        background-color: var(--tb-ui-dark-accent);
+        color: white!important;
+      }
+
+      h3 {
+        color: var(--paper-grey-800);
+        margin: 0;
+        font-weight: normal;
+        font-size: 14px;
+        margin-bottom: 5px;
+        display: block;
+        pointer-events: none;
+      }
+    </style>
+  </template>
+  <script>
+    Polymer({
+      is: "tf-option-selector",
+      properties: {
+        name: String,
+        selectedId: {
+          type: String,
+          notify: true,
+          observer: '_selectedIdChanged'
+        }
+      },
+      attached: function() {
+        this.async(function() {
+          this.getEffectiveChildren().forEach(function(node) {
+            this.listen(node, 'tap', '_selectTarget');
+          }.bind(this));
+        });
+      },
+      _selectTarget: function(e) {
+        this.selectedId = e.currentTarget.id;
+      },
+      _selectedIdChanged: function() {
+        var selected = this.queryEffectiveChildren('#' + this.selectedId);
+        if (!selected) {
+          return;
+        }
+
+        this.getEffectiveChildren().forEach(function(node) {
+          node.classList.remove("selected");
+        });
+        selected.classList.add("selected");
+      }
+    });
+  </script>
+</dom-module>
+<link rel="import" href="../iron-collapse/iron-collapse.html">
+
+<dom-module id="tf-collapsable-pane" assetpath="../tf-dashboard-common/">
+  <template>
+    <button class="heading" on-tap="togglePane" open-button$="[[opened]]">
+    <span class="name">[[name]]</span>
+    <span class="count">
+      <span>[[count]]</span>
+    </span>
+  </button>
+    <iron-collapse opened="[[opened]]">
+      <div class="content">
+        <template is="dom-if" if="[[opened]]" restamp="[[restamp]]">
+          <content></content>
+        </template>
+      </div>
+    </iron-collapse>
+    <style>
+      :host {
+        display: block;
+        margin: 0 5px 1px 10px;
+      }
+
+      :host:first-of-type {
+        margin-top: 20px;
+      }
+      
+      :host:last-of-type {
+        margin-bottom: 20px;
+      }
+
+      .heading {
+        background-color: white;
+        border: none;
+        cursor: pointer;
+        width: 100%;
+        font-size: 15px;
+        line-height: 1;
+        box-shadow: 0 1px 5px rgba(0,0,0,0.2);
+        padding: 10px 15px;
+      }
+
+      .content {
+        padding: 15px;
+        border: 1px solid #dedede;
+        border-top: none;
+        border-bottom-left-radius: 2px;
+        border-bottom-right-radius: 2px;
+        background: white;
+      }
+
+      [open-button] {
+        border-bottom-left-radius: 0px !important;
+        border-bottom-right-radius: 0px !important;
+      }
+
+      .name {
+        float: left;
+      }
+
+      .count {
+        float: right;
+        margin-right: 5px;
+        font-size: 12px;
+        color: var(--paper-grey-500);
+      }
+    </style>
+  </template>
+  <script>
+    Polymer({
+      is: "tf-collapsable-pane",
+      properties: {
+        opened: {type: Boolean, value: false},
+        restamp: {type: Boolean, value: true},
+        name: {type: String, observer: "hide"},
+        count: {type: Number},
+      },
+      hide: function() {
+        this.opened = false;
+      },
+      togglePane: function() {
+        this.opened = !this.opened;
+      }
+    });
+  </script>
+
+</dom-module>
+<script src="../plottable/plottable.js"></script>
+<link rel="stylesheet" type="text/css" href="../plottable/plottable.css">
+
+<dom-module id="tf-chart-scaffold" assetpath="../tf-dashboard-common/">
+  <template>
+    <content></content>
+    <style>
+      :host {
+        -webkit-user-select: none;
+        -moz-user-select: none;
+        display: flex;
+        flex-direction: column;
+        flex-grow: 1;
+        flex-shrink: 1;
+        position: relative;
+      }
+    </style>
+  </template>
+  <script>
+    Polymer({
+      is: "tf-chart-scaffold",
+      properties: {
+        tag: String,
+        dataProvider: Function,
+        visibleSeries: Array,
+        _attached: {
+          type: Boolean,
+          value: false
+        }
+      },
+      observers: [
+        "reload(tag, dataProvider)",
+        "_changeSeries(visibleSeries.*)"
+      ],
+      ready: function() {
+        this.fire('ready');
+      },
+      attached: function() {
+        this._attached = true;
+        this._changeSeries();
+      },
+      detached: function() {
+        this._attached = false;
+      },
+      reload: function() {
+        if (!this._attached) {
+          return;
+        }
+        else if (!this.dataProvider) {
+          throw new Error('tf-chart-scaffold requires a dataProvider.');
+        }
+        else if (!this.tag) {
+          throw new Error('tf-chart-scaffold requires a tag.');
+        }
+
+        this.visibleSeries.forEach(function(name) {
+          this.dataProvider(this.tag, name).then(function(data) {
+            this.chart().setSeriesData(name, data);
+          }.bind(this));
+        }.bind(this));
+      },
+      _changeSeries: function() {
+        if (!this._attached) {
+           return;
+        }
+        else if (!this.visibleSeries) {
+          throw new Error('tf-chart-scaffold requires a visibleSeries.');
+        }
+
+        this.chart().setVisibleSeries(this.visibleSeries);
+        this.reload();
+      },
+      chart: function() {
+        var children = this.getEffectiveChildren();
+        if (!children.length) {
+          throw new Error('tf-chart-scaffold has no children');
+        }
+
+        var child = children[0];
+        if (!child.setVisibleSeries || !child.setSeriesData) {
+          throw new Error("tf-chart-scaffold's content doesn't implement the " +
+              "required interface");
+        }
+        return child;
+      }
+    });
+  </script>
+</dom-module>
+
+<dom-module id="tf-panes-helper" assetpath="../tf-dashboard-common/">
+  <template>
+    <content></content> 
+    <tf-no-data-warning data-type="[[dataType]]" show-warning="[[dataNotFound]]"></tf-no-data-warning>
+
+    <template is="dom-repeat" items="[[categories]]" as="category">
+      <tf-collapsable-pane name="[[category.name]]" count="[[_count(category.tags, selectedRuns.*)]]">
+        <div class="layout horizontal wrap">
+          <template is="dom-repeat" items="[[_categoryCards(category, selectedRuns.*, run2tag.*)]]">
+              <div class="card">
+                <span class="card-title">[[item.tag]]</span>
+                <template is="dom-if" if="[[repeatForRuns]]">
+                  <span class="card-subtitle">[[item.run]]</span>
+                </template>
+                <div class="card-content">
+                  <tf-chart-scaffold tag="[[item.tag]]" data-provider="[[dataProvider]]" visible-series="[[item.runs]]" on-ready="_instantiateTemplate">
+                    
+                  </tf-chart-scaffold>
+                </div>
+                <div class="card-bottom-row">
+                  <paper-icon-button class="expand-button" icon="fullscreen" on-tap="_toggleExpanded"></paper-icon-button>
+                  <template is="dom-if" if="[[showDownloadLinks]]">
+                    <tf-downloader runs="[[item.runs]]" tag="[[item.tag]]" url-fn="[[downloadLinkUrlFunction]]">
+                    </tf-downloader>
+                  </template>
+                </div>
+              </div>
+          </template>
+        </div>
+      </tf-collapsable-pane>
+    </template>
+
+    <style>
+      .card {
+        height: var(--card-height, 200px);
+        width: var(--card-width, 300px);
+        display: flex;
+        flex-direction: column;
+        margin: 5px;
+        padding: 0 30px 30px 0;
+        -webkit-user-select: none;
+        -moz-user-select: none;
+        position: relative;
+      }
+
+      .card-expanded {
+        height: var(--card-expanded-height, 400px);
+        width: var(--card-expanded-width, 100%);
+      }
+
+      .card-title, .card-subtitle {
+        flex-grow: 0;
+        flex-shrink: 0;
+        font-size: 14px;
+        text-overflow: ellipsis;
+        overflow: hidden;
+      }
+
+      .card-subtitle {
+        font-size: 12px;
+      }
+
+      .card-content {
+        flex-grow: 1;
+        flex-shrink: 1;
+        display: flex;
+        margin-top: 10px;
+      }
+
+      .card-bottom-row {
+        position: absolute;
+        left: 0px;
+        bottom: 0px;
+        width: 100%;
+        display: flex;
+        flex-direction: row;
+        justify-content: space-between;
+        pointer-events: none;
+      }
+
+      .expand-button {
+        color: #2196F3;
+        width: 32px;
+        height: 32px;
+        padding: 4px;
+        border-radius: 100%;
+        pointer-events: auto;
+      }
+
+      .card-expanded .expand-button {
+        background: var(--tb-ui-light-accent);
+      }
+
+      tf-downloader {
+        margin-right: 30px;
+        pointer-events: auto;
+      }
+
+    </style>
+  </template>
+  <script>
+    Polymer({
+      is: "tf-panes-helper",
+      properties: {
+        /**
+         * Categories that separate the template instances. Each category will
+         * be given its own collapsible pane. The category must be an array of
+         * objects, each with a 'name' property and a 'tags' array of strings.
+         */
+        categories: Array,
+
+        /**
+         * The name of the data type that is used by this dashboard. This will
+         * be used to display what is missing when there is no data available.
+         */
+        dataType: String,
+
+        /**
+         * The function that requests and returns a promise with the data of the
+         * required type for the templates from the backend.
+         */
+        dataProvider: Object,
+
+        /**
+         * If false, instantiates one template for each tag and calls
+         * setVisibleSeries on the first element of the template with all valid
+         * runs the tag has. If true, instantiates one template for each run of
+         * each tag, and calls setVisibleSeries of the first element of the
+         * instantiated template with just the one run.
+         */
+        repeatForRuns: {
+          type: Boolean,
+          value: false
+        },
+
+        /**
+         * Map from runs to the valid tags that have them.
+         */
+        run2tag: Object,
+
+        /**
+         * Array with the runs that are selected by the user (i.e. valid to be
+         * displayed).
+         */
+        selectedRuns: Array,
+
+        /**
+         * If true, shows a menu with download links for the template data.
+         * If this is set to true, urlFn must also be provided.
+         */
+        showDownloadLinks: Boolean,
+
+        /**
+         * Function that returns the route to get data to download. Must be
+         * provided if showDownloadLinks is enabled.
+         */
+        downloadLinkUrlFunction: Function,
+        _contentTemplate: {
+          type: Object,
+          value: null
+        },
+        _stampedTemplates: {
+          type: Array,
+          value: function() { return [] }
+        }
+      },
+      behaviors: [
+        Polymer.Templatizer,
+      ],
+
+      /**
+       * Initializes the Polymer.Templatizer behavior with the template supplied
+       * by the user. With this, all calls to this.stamp() will produce an
+       * instance of the user template.
+       */
+      _initTemplatizer: function() {
+        if (!this._contentTemplate) {
+          // First template is used as the content.
+          this._contentTemplate = Polymer.dom(this).querySelector('template');
+          this.templatize(this._contentTemplate);
+        }
+      },
+
+      /**
+       * Called every time a tf-chart-scaffold is ready, stamps the user
+       * template inside the scaffold element (before it is attached) and
+       * stores the stamped template in an array to use for data binding
+       * (forwardParentProp/Path).
+       */
+      _instantiateTemplate: function(e) {
+        var scaffold = e.target;
+        this._initTemplatizer();
+        var instance = this.stamp();
+        this._stampedTemplates.push(instance);
+        Polymer.dom(scaffold).appendChild(instance.root);
+      },
+      _toggleExpanded: function(e) {
+        var currentTarget = Polymer.dom(e.currentTarget);
+        var card = currentTarget.node.closest('.card');
+        var scaffold = card.querySelector('tf-chart-scaffold');
+        card.classList.toggle('card-expanded');
+        scaffold.chart().redraw();
+      },
+      _count: function(tags) {
+        if (!this.repeatForRuns) {
+          return tags.length;
+        }
+
+        var targetTags = d3.set(tags);
+        var count = 0;
+        this.selectedRuns.forEach(function(r) {
+          this.run2tag[r].forEach(function(t) {
+            if (targetTags.has(t)) {
+              count++;
+            }
+          });
+        }.bind(this));
+        return count;
+      },
+      _categoryCards: function(category) {
+        var cards = [];
+        category.tags.forEach(function(tag) {
+          var runs = this.selectedRuns.filter(function(r) {
+            return this.run2tag[r] && this.run2tag[r].indexOf(tag) !== -1;
+          }.bind(this));
+
+          if (this.repeatForRuns) {
+            runs.forEach(function(run) {
+              cards.push({tag: tag, run: run, runs: [run]});
+            });
+          } else {
+            cards.push({tag: tag, runs: runs});
+          }
+        }.bind(this));
+
+        return cards;
+      },
+
+      /*
+       * Polymer data binding forwarding functions. Check the
+       * Polymer.Templatizer documentation for more information.
+       */
+
+      _forwardParentProp: function(property, value) {
+        this._stampedTemplates.forEach(function(instance) {
+          instance[property] = value;
+        });
+      },
+      _forwardParentPath: function(path, value) {
+        this._stampedTemplates.forEach(function(instance) {
+          instance.notifyPath(path, value, true);
+        });
+      },
+      // TODO(renatoutsch): implement the instance forwarding for two-way data
+      // binding.
+    });
+  </script>
+</dom-module>
 <dom-module id="tf-storage" assetpath="../tf-storage/">
  <script>/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 
@@ -1570,6 +2402,7 @@ var TF;
 })(TF || (TF = {}));
 </script>
 </dom-module>
+<link rel="import" href="../paper-icon-button/paper-icon-button.html">
 
 <dom-module id="tf-regex-group" assetpath="../tf-dashboard-common/">
   <template>
@@ -1683,19 +2516,7 @@ var TF;
     });
   </script>
 </dom-module>
-<style is="custom-style">
-
-  :root {
-    --tb-orange-weak: #ffa726;
-    --tb-orange-strong: #f57c00;
-    --tb-grey-darker: #e2e2e2;
-    --tb-grey-lighter: #f3f3f3;
-    --tb-ui-dark-accent: #757575;
-    --tb-ui-light-accent: #e0e0e0;
-    --tb-graph-faded: #e0d4b3;
-  }
-
-</style>
+<link rel="import" href="../paper-toggle-button/paper-toggle-button.html">
 
 <dom-module id="tf-categorizer" assetpath="../tf-dashboard-common/">
   <template>
@@ -1880,580 +2701,6 @@ var Categorizer;
     });
   </script>
 </dom-module>
-
-<dom-module id="tf-chart-scaffold" assetpath="../tf-dashboard-common/">
-  <template>
-    <content></content>
-    <style>
-      :host {
-        -webkit-user-select: none;
-        -moz-user-select: none;
-        display: flex;
-        flex-direction: column;
-        flex-grow: 1;
-        flex-shrink: 1;
-        position: relative;
-      }
-    </style>
-  </template>
-  <script>
-    Polymer({
-      is: "tf-chart-scaffold",
-      properties: {
-        tag: String,
-        dataProvider: Function,
-        visibleSeries: Array,
-        _attached: {
-          type: Boolean,
-          value: false
-        }
-      },
-      observers: [
-        "reload(tag, dataProvider)",
-        "_changeSeries(visibleSeries.*)"
-      ],
-      ready: function() {
-        this.fire('ready');
-      },
-      attached: function() {
-        this._attached = true;
-        this._changeSeries();
-      },
-      detached: function() {
-        this._attached = false;
-      },
-      reload: function() {
-        if (!this._attached) {
-          return;
-        }
-        else if (!this.dataProvider) {
-          throw new Error('tf-chart-scaffold requires a dataProvider.');
-        }
-        else if (!this.tag) {
-          throw new Error('tf-chart-scaffold requires a tag.');
-        }
-
-        this.visibleSeries.forEach(function(name) {
-          this.dataProvider(this.tag, name).then(function(data) {
-            this.chart().setSeriesData(name, data);
-          }.bind(this));
-        }.bind(this));
-      },
-      _changeSeries: function() {
-        if (!this._attached) {
-           return;
-        }
-        else if (!this.visibleSeries) {
-          throw new Error('tf-chart-scaffold requires a visibleSeries.');
-        }
-
-        this.chart().setVisibleSeries(this.visibleSeries);
-        this.reload();
-      },
-      chart: function() {
-        var children = this.getEffectiveChildren();
-        if (!children.length) {
-          throw new Error('tf-chart-scaffold has no children');
-        }
-
-        var child = children[0];
-        if (!child.setVisibleSeries || !child.setSeriesData) {
-          throw new Error("tf-chart-scaffold's content doesn't implement the " +
-              "required interface");
-        }
-        return child;
-      }
-    });
-  </script>
-</dom-module>
-
-<dom-module id="tf-collapsable-pane" assetpath="../tf-dashboard-common/">
-  <template>
-    <button class="heading" on-tap="togglePane" open-button$="[[opened]]">
-    <span class="name">[[name]]</span>
-    <span class="count">
-      <span>[[count]]</span>
-    </span>
-  </button>
-    <iron-collapse opened="[[opened]]">
-      <div class="content">
-        <template is="dom-if" if="[[opened]]" restamp="[[restamp]]">
-          <content></content>
-        </template>
-      </div>
-    </iron-collapse>
-    <style>
-      :host {
-        display: block;
-        margin: 0 5px 1px 10px;
-      }
-
-      :host:first-of-type {
-        margin-top: 20px;
-      }
-      
-      :host:last-of-type {
-        margin-bottom: 20px;
-      }
-
-      .heading {
-        background-color: white;
-        border: none;
-        cursor: pointer;
-        width: 100%;
-        font-size: 15px;
-        line-height: 1;
-        box-shadow: 0 1px 5px rgba(0,0,0,0.2);
-        padding: 10px 15px;
-      }
-
-      .content {
-        padding: 15px;
-        border: 1px solid #dedede;
-        border-top: none;
-        border-bottom-left-radius: 2px;
-        border-bottom-right-radius: 2px;
-        background: white;
-      }
-
-      [open-button] {
-        border-bottom-left-radius: 0px !important;
-        border-bottom-right-radius: 0px !important;
-      }
-
-      .name {
-        float: left;
-      }
-
-      .count {
-        float: right;
-        margin-right: 5px;
-        font-size: 12px;
-        color: var(--paper-grey-500);
-      }
-    </style>
-  </template>
-  <script>
-    Polymer({
-      is: "tf-collapsable-pane",
-      properties: {
-        opened: {type: Boolean, value: false},
-        restamp: {type: Boolean, value: true},
-        name: {type: String, observer: "hide"},
-        count: {type: Number},
-      },
-      hide: function() {
-        this.opened = false;
-      },
-      togglePane: function() {
-        this.opened = !this.opened;
-      }
-    });
-  </script>
-
-</dom-module>
-
-<dom-module id="scrollbar-style" assetpath="../tf-dashboard-common/">
-  <template>
-    <style>
-      .scrollbar::-webkit-scrollbar-track
-      {
-        visibility: hidden;
-      }
-
-      .scrollbar::-webkit-scrollbar
-      {
-        width: 10px;
-      }
-
-      .scrollbar::-webkit-scrollbar-thumb
-      {
-        border-radius: 10px;
-        -webkit-box-shadow: inset 0 0 2px rgba(0,0,0,.3);
-        background-color: var(--paper-grey-500);
-        color: var(--paper-grey-900);
-      }
-      .scrollbar {
-        box-sizing: border-box;
-      }
-    </style>
-  </template>
-</dom-module>
-
-<dom-module id="tf-dashboard-layout" assetpath="../tf-dashboard-common/">
-  <template>
-    <div id="sidebar">
-      <content select=".sidebar"></content>
-    </div>
-
-    <div id="center" class="scrollbar">
-      <content select=".center"></content>
-    </div>
-    <style include="scrollbar-style"></style>
-    <style>
-      #sidebar {
-        width: inherit;
-        height: 100%;
-        overflow: ellipsis;
-        flex-grow: 0;
-        flex-shrink: 0;
-      }
-
-      #center {
-        height: 100%;
-        overflow-y: auto;
-        flex-grow: 1;
-        flex-shrink: 1;
-      }
-
-      .tf-graph-dashboard #center {
-        background: white;
-      }
-
-      :host {
-        display: flex;
-        flex-direction: row;
-        height: 100%;
-      }
-    </style>
-  </template>
-  <script>
-    Polymer({
-      is: "tf-dashboard-layout",
-    });
-  </script>
-</dom-module>
-<dom-module id="dashboard-style" assetpath="../tf-dashboard-common/">
-  <template>
-    <style>
-      #content-container{
-        display: block;
-      }
-
-      .sidebar {
-        display: flex;
-        flex-direction: column;
-        height: 100%;
-        margin-right: 20px;
-      }
-
-      #categorizer {
-        flex-shrink: 0;
-      }
-
-      #xTypeSelector {
-        flex-shrink: 0;
-        margin: 20px 0;
-      }
-
-      #runSelector {
-        flex-shrink: 1;
-        flex-grow: 1;
-      }
-
-      #tooltip-sorting {
-        display: flex;
-        font-size: 14px;
-      }
-
-      #tooltip-sorting-label {
-        margin-top: 13px;
-        margin-left: 28px;
-      }
-
-      #tooltip-sorting paper-dropdown-menu {
-        margin-left: 10px;
-        --paper-input-container-focus-color: var(--tb-orange-strong);
-        width: 105px;
-      }
-
-      .sidebar-section {
-        border-top: solid 1px rgba(0, 0, 0, 0.12);
-        padding: 20px 0px 20px 30px;
-      }
-
-      .sidebar-section:first-child {
-        border: none;
-      }
-
-      .sidebar-section:last-child {
-        flex-grow: 1;
-        display: flex;
-      }
-
-      paper-checkbox {
-        --paper-checkbox-checked-color: var(--tb-ui-dark-accent);
-        --paper-checkbox-unchecked-color: var(--tb-ui-dark-accent);
-        font-size: 14px;
-      }
-    </style>
-  </template>
-</dom-module>
-
-<dom-module id="tf-downloader" assetpath="../tf-dashboard-common/">
-  <template>
-    <paper-dropdown-menu no-label-float="true" label="run to download" selected-item-label="{{_run}}">
-      <paper-menu class="dropdown-content">
-        <template is="dom-repeat" items="[[runs]]">
-          <paper-item no-label-float="true">[[item]]</paper-item>
-        </template>
-      </paper-menu>
-    </paper-dropdown-menu>
-    <div class="center">
-      <span>
-        <a download="[[_csvName(_run)]]" href="[[_csvUrl(_run, urlFn)]]">CSV</a>
-        <a download="[[_jsonName(_run)]]" href="[[_jsonUrl(_run, urlFn)]]">JSON</a>
-      </span>
-    </div>
-    <style>
-      :host {
-        display: flex;
-        height: 32px;
-      }
-      .center {
-        display: flex;
-        align-self: center;
-      }
-      paper-dropdown-menu {
-        width: 100px;
-        --paper-input-container-label: {
-          font-size: 10px;
-        }
-        --paper-input-container-input: {
-          font-size: 10px;
-        }
-      }
-      a {
-        font-size: 10px;
-        border-radius: 3px;
-        border: 1px solid #EEE;
-      }
-      paper-input {
-        font-size: 22px;
-      }
-    </style>
-  </template>
-  <script>
-    Polymer({
-      is: "tf-downloader",
-      properties: {
-        _run: String,
-        runs: Array,
-        tag: String,
-        urlFn: Function,
-      },
-      _csvUrl: function(_run, urlFn) {
-        return urlFn(this.tag, _run) + "&format=csv";
-      },
-      _jsonUrl: function(_run, urlFn) {
-        return urlFn(this.tag, _run);
-      },
-      _csvName: function(_run) {
-        return "run_" + _run + ",tag_" + this.tag + ".csv";
-      },
-      _jsonName: function(_run) {
-        return "run-" + _run + "-tag-" + this.tag + ".json";
-      },
-    });
-  </script>
-</dom-module>
-
-<dom-module id="tf-no-data-warning" assetpath="../tf-dashboard-common/">
-  <template>
-    <template is="dom-if" if="[[showWarning]]">
-      <div class="warning">
-        <template is="dom-if" if="[[graphMode]]">
-          <h3>
-            No graph definition files were found.
-          </h3>
-          <p>
-            To store a graph, create a
-            <code>tf.train.SummaryWriter</code>
-            and pass the graph either via the constructor, or by calling its
-            <code>add_graph()</code> method.
-            You may want to check out the
-            <a href="https://www.tensorflow.org/versions/master/how_tos/graph_viz/index.html">
-              graph visualizer tutorial
-            </a>
-            .
-          </p>
-        </template>
-        <template is="dom-if" if="[[!graphMode]]">
-          <h3>
-            No <span>[[dataType]]</span> data was found.
-          </h3>
-          <p>
-            Probable causes:
-            </p><ul>
-              <li>
-                You haven't written any <span>[[dataType]]</span> data
-                to your event files.
-              </li>
-              <li>
-                TensorBoard can't find your event files.
-              </li>
-            </ul>
-          <p></p>
-        </template>
-        <p>
-          If you're new to using TensorBoard, and want to find out how to add
-          data and set up your event files, check out the
-          <a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tensorboard/README.md">
-            README
-          </a>
-          and perhaps the
-          <a href="https://www.tensorflow.org/versions/master/how_tos/summaries_and_tensorboard/index.html">
-            TensorBoard tutorial
-          </a>.
-        </p>
-
-        <p>
-          If you think TensorBoard is configured properly, please see the
-          <a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tensorboard/README.md#my-tensorboard-isnt-showing-any-data-whats-wrong">
-            section of the README devoted to missing data problems
-          </a>
-          and consider filing an issue on GitHub.
-        </p>
-
-      </div>
-    </template>
-    <style>
-      .warning {
-        max-width: 540px;
-        margin: 80px auto 0 auto;
-      }
-    </style>
-  </template>
-
-  <script>
-    Polymer({
-      is: "tf-no-data-warning",
-      properties: {
-        dataType: String,
-        showWarning: Boolean,
-        graphMode: {type: Boolean, computed: "_isGraph(dataType)"},
-      },
-      _isGraph: function(dataType) {
-        return dataType === "graph";
-      },
-    });
-  </script>
-</dom-module>
-<script>/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-var TF;
-(function (TF) {
-    var Dashboard;
-    (function (Dashboard) {
-        /**
-         * ReloadBehavior: A simple behavior for dashboards where the
-         * frontendReload() function should find every child element with a
-         * given tag name (e.g. "tf-line-chart" or "tf-image-loader")
-         * and call a `reload` method on that child.
-         * May later extend it so it has more sophisticated logic, e.g. reloading
-         * only tags that are in view.
-         */
-        function ReloadBehavior(tagName) {
-            return {
-                properties: {
-                    reloadTag: {
-                        type: String,
-                        value: tagName,
-                    },
-                },
-                frontendReload: function () {
-                    var elements = this.getElementsByTagName(this.reloadTag);
-                    Array.prototype.forEach.call(elements, function (x) { x.reload(); });
-                },
-            };
-        }
-        Dashboard.ReloadBehavior = ReloadBehavior;
-    })(Dashboard = TF.Dashboard || (TF.Dashboard = {}));
-})(TF || (TF = {}));
-</script>
-
-<dom-module id="tf-option-selector" assetpath="../tf-dashboard-common/">
-  <template>
-    <div id="wrap">
-      <h3>[[name]]</h3>
-      <div class="content-wrapper"><content></content></div>
-    </div>
-    <style>
-      .content-wrapper ::content > * {
-        width: 30%;
-        font-size: 13px;
-        background: none;
-        margin-top: 10px;
-        color: var(--tb-ui-dark-accent);
-      }
-
-      .content-wrapper ::content :first-of-type {
-        margin-left: 0;
-      }
-
-      .content-wrapper ::content .selected {
-        background-color: var(--tb-ui-dark-accent);
-        color: white!important;
-      }
-
-      h3 {
-        color: var(--paper-grey-800);
-        margin: 0;
-        font-weight: normal;
-        font-size: 14px;
-        margin-bottom: 5px;
-        display: block;
-        pointer-events: none;
-      }
-    </style>
-  </template>
-  <script>
-    Polymer({
-      is: "tf-option-selector",
-      properties: {
-        name: String,
-        selectedId: {
-          type: String,
-          notify: true,
-          observer: '_selectedIdChanged'
-        }
-      },
-      attached: function() {
-        this.async(function() {
-          this.getEffectiveChildren().forEach(function(node) {
-            this.listen(node, 'tap', '_selectTarget');
-          }.bind(this));
-        });
-      },
-      _selectTarget: function(e) {
-        this.selectedId = e.currentTarget.id;
-      },
-      _selectedIdChanged: function() {
-        var selected = this.queryEffectiveChildren('#' + this.selectedId);
-        if (!selected) {
-          return;
-        }
-
-        this.getEffectiveChildren().forEach(function(node) {
-          node.classList.remove("selected");
-        });
-        selected.classList.add("selected");
-      }
-    });
-  </script>
-</dom-module>
 <dom-module id="run-color-style" assetpath="../tf-dashboard-common/">
   <template>
     <style>
@@ -2781,262 +3028,103 @@ var TF;
   </script>
 </dom-module>
 
-<dom-module id="tf-panes-helper" assetpath="../tf-dashboard-common/">
+<dom-module id="tf-sidebar-helper" assetpath="../tf-dashboard-common/">
   <template>
-    <content></content> 
-    <tf-no-data-warning data-type="[[dataType]]" show-warning="[[dataNotFound]]"></tf-no-data-warning>
-
-    <template is="dom-repeat" items="[[categories]]" as="category">
-      <tf-collapsable-pane name="[[category.name]]" count="[[_count(category.tags, selectedRuns.*)]]">
-        <div class="layout horizontal wrap">
-          <template is="dom-repeat" items="[[_categoryCards(category, selectedRuns.*, run2tag.*)]]">
-              <div class="card">
-                <span class="card-title">[[item.tag]]</span>
-                <template is="dom-if" if="[[repeatForRuns]]">
-                  <span class="card-subtitle">[[item.run]]</span>
-                </template>
-                <div class="card-content">
-                  <tf-chart-scaffold tag="[[item.tag]]" data-provider="[[dataProvider]]" visible-series="[[item.runs]]" on-ready="_instantiateTemplate">
-                    
-                  </tf-chart-scaffold>
-                </div>
-                <div class="card-bottom-row">
-                  <paper-icon-button class="expand-button" icon="fullscreen" on-tap="_toggleExpanded"></paper-icon-button>
-                  <template is="dom-if" if="[[showDownloadLinks]]">
-                    <tf-downloader runs="[[item.runs]]" tag="[[item.tag]]" url-fn="[[downloadLinkUrlFunction]]">
-                    </tf-downloader>
-                  </template>
-                </div>
-              </div>
-          </template>
-        </div>
-      </tf-collapsable-pane>
-    </template>
-
+    <div class="sidebar-section">
+      <tf-categorizer id="categorizer" tags="[[_visibleTags]]" categories="{{categories}}"></tf-categorizer>
+      <content select=".extend-first-section"></content>
+    </div>
+    <content></content>
+    <div class="sidebar-section">
+      <tf-run-selector id="runSelector" runs="[[runs]]" color-scale="[[colorScale]]" out-selected="{{selectedRuns}}"></tf-run-selector>
+    </div>
+    <style include="dashboard-style"></style>
     <style>
-      .card {
-        height: var(--card-height, 200px);
-        width: var(--card-width, 300px);
+      :host {
         display: flex;
         flex-direction: column;
-        margin: 5px;
-        padding: 0 30px 30px 0;
-        -webkit-user-select: none;
-        -moz-user-select: none;
-        position: relative;
+        height: 100%;
       }
 
-      .card-expanded {
-        height: var(--card-expanded-height, 400px);
-        width: var(--card-expanded-width, 100%);
-      }
-
-      .card-title, .card-subtitle {
-        flex-grow: 0;
+      #categorizer {
         flex-shrink: 0;
-        font-size: 14px;
-        text-overflow: ellipsis;
-        overflow: hidden;
       }
 
-      .card-subtitle {
-        font-size: 12px;
-      }
-
-      .card-content {
-        flex-grow: 1;
+      #runSelector {
         flex-shrink: 1;
+        flex-grow: 1;
+      }
+
+      .sidebar-section {
+        border-top: solid 1px rgba(0, 0, 0, 0.12);
+        padding: 20px 0px 20px 30px;
+      }
+
+      .sidebar-section:first-child {
+        border: none;
+      }
+
+      .sidebar-section:last-child {
+        flex-grow: 1;
         display: flex;
-        margin-top: 10px;
       }
 
-      .card-bottom-row {
-        position: absolute;
-        left: 0px;
-        bottom: 0px;
-        width: 100%;
-        display: flex;
-        flex-direction: row;
-        justify-content: space-between;
-        pointer-events: none;
+      paper-checkbox {
+        --paper-checkbox-checked-color: var(--tb-ui-dark-accent);
+        --paper-checkbox-unchecked-color: var(--tb-ui-dark-accent);
+        font-size: 14px;
       }
-
-      .expand-button {
-        color: #2196F3;
-        width: 32px;
-        height: 32px;
-        padding: 4px;
-        border-radius: 100%;
-        pointer-events: auto;
-      }
-
-      .card-expanded .expand-button {
-        background: var(--tb-ui-light-accent);
-      }
-
-      tf-downloader {
-        margin-right: 30px;
-        pointer-events: auto;
-      }
-
     </style>
   </template>
   <script>
     Polymer({
-      is: "tf-panes-helper",
+      is: "tf-sidebar-helper",
       properties: {
         /**
-         * Categories that separate the template instances. Each category will
-         * be given its own collapsible pane. The category must be an array of
-         * objects, each with a 'name' property and a 'tags' array of strings.
+         * This is an output of the categories that the user selected to
+         * separate the different tags. Each category here should be given its
+         * own collapsible pane.
          */
-        categories: Array,
-
-        /**
-         * The name of the data type that is used by this dashboard. This will
-         * be used to display what is missing when there is no data available.
-         */
-        dataType: String,
-
-        /**
-         * The function that requests and returns a promise with the data of the
-         * required type for the templates from the backend.
-         */
-        dataProvider: Object,
-
-        /**
-         * If false, instantiates one template for each tag and calls
-         * setVisibleSeries on the first element of the template with all valid
-         * runs the tag has. If true, instantiates one template for each run of
-         * each tag, and calls setVisibleSeries of the first element of the
-         * instantiated template with just the one run.
-         */
-        repeatForRuns: {
-          type: Boolean,
-          value: false
+        categories: {
+          type: Array,
+          notify: true,
         },
 
+        /**
+         * Input of the colors that are used for the user's runs.
+         */
+        colorScale: Object,
+
         /**
          * Map from runs to the valid tags that have them.
          */
         run2tag: Object,
 
         /**
-         * Array with the runs that are selected by the user (i.e. valid to be
-         * displayed).
+         * Input of all valid runs that can be selected by the user.
          */
-        selectedRuns: Array,
+        runs: Array,
 
         /**
-         * If true, shows a menu with download links for the template data.
-         * If this is set to true, urlFn must also be provided.
+         * Outputs an array with the runs that are selected by the user (i.e.
+         * valid to be displayed).
          */
-        showDownloadLinks: Boolean,
-
-        /**
-         * Function that returns the route to get data to download. Must be
-         * provided if showDownloadLinks is enabled.
-         */
-        downloadLinkUrlFunction: Function,
-        _contentTemplate: {
-          type: Object,
-          value: null
-        },
-        _stampedTemplates: {
+        selectedRuns: {
           type: Array,
-          value: function() { return [] }
-        }
-      },
-      behaviors: [
-        Polymer.Templatizer,
-      ],
+          notify: true,
+        },
 
-      /**
-       * Initializes the Polymer.Templatizer behavior with the template supplied
-       * by the user. With this, all calls to this.stamp() will produce an
-       * instance of the user template.
-       */
-      _initTemplatizer: function() {
-        if (!this._contentTemplate) {
-          // First template is used as the content.
-          this._contentTemplate = Polymer.dom(this).querySelector('template');
-          this.templatize(this._contentTemplate);
-        }
+        _visibleTags: {
+          type: Array,
+          computed: "_getVisibleTags(selectedRuns.*, run2tag.*)"
+        },
       },
-
-      /**
-       * Called every time a tf-chart-scaffold is ready, stamps the user
-       * template inside the scaffold element (before it is attached) and
-       * stores the stamped template in an array to use for data binding
-       * (forwardParentProp/Path).
-       */
-      _instantiateTemplate: function(e) {
-        var scaffold = e.target;
-        this._initTemplatizer();
-        var instance = this.stamp();
-        this._stampedTemplates.push(instance);
-        Polymer.dom(scaffold).appendChild(instance.root);
+      _getVisibleTags: function() {
+        var keys = this.selectedRuns;
+        var dict = this.run2tag;
+        return _.union.apply(null, keys.map(function(k) {return dict[k]}));
       },
-      _toggleExpanded: function(e) {
-        var currentTarget = Polymer.dom(e.currentTarget);
-        var card = currentTarget.node.closest('.card');
-        var scaffold = card.querySelector('tf-chart-scaffold');
-        card.classList.toggle('card-expanded');
-        scaffold.chart().redraw();
-      },
-      _count: function(tags) {
-        if (!this.repeatForRuns) {
-          return tags.length;
-        }
-
-        var targetTags = d3.set(tags);
-        var count = 0;
-        this.selectedRuns.forEach(function(r) {
-          this.run2tag[r].forEach(function(t) {
-            if (targetTags.has(t)) {
-              count++;
-            }
-          });
-        }.bind(this));
-        return count;
-      },
-      _categoryCards: function(category) {
-        var cards = [];
-        category.tags.forEach(function(tag) {
-          var runs = this.selectedRuns.filter(function(r) {
-            return this.run2tag[r] && this.run2tag[r].indexOf(tag) !== -1;
-          }.bind(this));
-
-          if (this.repeatForRuns) {
-            runs.forEach(function(run) {
-              cards.push({tag: tag, run: run, runs: [run]});
-            });
-          } else {
-            cards.push({tag: tag, runs: runs});
-          }
-        }.bind(this));
-
-        return cards;
-      },
-
-      /*
-       * Polymer data binding forwarding functions. Check the
-       * Polymer.Templatizer documentation for more information.
-       */
-
-      _forwardParentProp: function(property, value) {
-        this._stampedTemplates.forEach(function(instance) {
-          instance[property] = value;
-        });
-      },
-      _forwardParentPath: function(path, value) {
-        this._stampedTemplates.forEach(function(instance) {
-          instance.notifyPath(path, value, true);
-        });
-      },
-      // TODO(renatoutsch): implement the instance forwarding for two-way data
-      // binding.
-    });
+    })
   </script>
 </dom-module>
 
@@ -4144,43 +4232,41 @@ var VZ;
 <dom-module id="tf-event-dashboard" assetpath="../tf-event-dashboard/">
   <template>
     <div id="plumbing">
-      <tf-color-scale id="colorScale" runs="[[runs]]" out-color-scale="{{colorScale}}"></tf-color-scale>
+      <tf-color-scale id="colorScale" runs="[[runs]]" out-color-scale="{{_colorScale}}"></tf-color-scale>
     </div>
 
     <tf-dashboard-layout>
       <div class="sidebar">
-        <div class="sidebar-section">
-          <tf-categorizer id="categorizer" tags="[[_visibleTags]]" categories="{{categories}}"></tf-categorizer>
-          <paper-checkbox id="download-option" checked="{{_showDownloadLinks}}">Data download links</paper-checkbox>
-          <div id="tooltip-sorting">
-            <div id="tooltip-sorting-label">Tooltip sorting method:</div>
-            <paper-dropdown-menu no-label-float="" selected-item-label="{{_tooltipSortingMethod}}">
-              <paper-menu class="dropdown-content" selected="0">
-                <paper-item>default</paper-item>
-                <paper-item>descending</paper-item>
-                <paper-item>ascending</paper-item>
-              </paper-menu>
-            </paper-dropdown-menu>
+        <tf-sidebar-helper categories="{{_categories}}" color-scale="[[_colorScale]]" run2tag="[[run2tag]]" runs="[[runs]]" selected-runs="{{_selectedRuns}}">
+          <div class="extend-first-section">
+            <paper-checkbox id="download-option" checked="{{_showDownloadLinks}}">Data download links</paper-checkbox>
+            <div id="tooltip-sorting">
+              <div id="tooltip-sorting-label">Tooltip sorting method:</div>
+              <paper-dropdown-menu no-label-float="" selected-item-label="{{_tooltipSortingMethod}}">
+                <paper-menu class="dropdown-content" selected="0">
+                  <paper-item>default</paper-item>
+                  <paper-item>descending</paper-item>
+                  <paper-item>ascending</paper-item>
+                </paper-menu>
+              </paper-dropdown-menu>
+            </div>
           </div>
-        </div>
-        <div class="sidebar-section">
-          <tf-smoothing-input weight="{{_smoothingWeight}}" step="0.001" min="0" max="1"></tf-smoothing-input>
-        </div>
-        <div class="sidebar-section">
-          <tf-option-selector id="xTypeSelector" name="Horizontal Axis" selected-id="{{_xType}}">
-            <paper-button id="step">step</paper-button>
-            <paper-button id="relative">relative</paper-button>
-            <paper-button id="wall_time">wall</paper-button>
-          </tf-option-selector>
-        </div>
-        <div class="sidebar-section">
-          <tf-run-selector id="runSelector" runs="[[runs]]" color-scale="[[colorScale]]" out-selected="{{selectedRuns}}"></tf-run-selector>
-        </div>
+          <div class="sidebar-section">
+            <tf-smoothing-input weight="{{_smoothingWeight}}" step="0.001" min="0" max="1"></tf-smoothing-input>
+          </div>
+          <div class="sidebar-section">
+            <tf-option-selector id="xTypeSelector" name="Horizontal Axis" selected-id="{{_xType}}">
+              <paper-button id="step">step</paper-button>
+              <paper-button id="relative">relative</paper-button>
+              <paper-button id="wall_time">wall</paper-button>
+            </tf-option-selector>
+          </div>
+        </tf-sidebar-helper>
       </div>
       <div class="center">
-        <tf-panes-helper categories="[[categories]]" data-type="[[dataType]]" data-provider="[[dataProvider]]" run2tag="[[run2tag]]" selected-runs="[[selectedRuns]]" show-download-links="[[_showDownloadLinks]]" download-link-url-function="[[scalarUrl]]">
+        <tf-panes-helper categories="[[_categories]]" data-type="[[dataType]]" data-provider="[[dataProvider]]" run2tag="[[run2tag]]" selected-runs="[[_selectedRuns]]" show-download-links="[[_showDownloadLinks]]" download-link-url-function="[[scalarUrl]]">
           <template>
-            <vz-line-chart x-type="[[_xType]]" color-scale="[[colorScale]]" smoothing-enabled="[[_smoothingEnabled]]" smoothing-weight="[[_smoothingWeight]]" tooltip-sorting-method="[[_tooltipSortingMethod]]"></vz-line-chart>
+            <vz-line-chart x-type="[[_xType]]" color-scale="[[_colorScale]]" smoothing-enabled="[[_smoothingEnabled]]" smoothing-weight="[[_smoothingWeight]]" tooltip-sorting-method="[[_tooltipSortingMethod]]"></vz-line-chart>
             <paper-icon-button class="log-button" icon="line-weight" on-tap="toggleLogScale" title="Toggle y-axis log scale"></paper-icon-button>
           </template>
         </tf-panes-helper>
@@ -4204,6 +4290,23 @@ var VZ;
       .log-button-selected {
         background: var(--tb-ui-light-accent);
       }
+
+      #tooltip-sorting {
+        display: flex;
+        font-size: 14px;
+        margin-top: 5px;
+      }
+
+      #tooltip-sorting-label {
+        margin-top: 13px;
+        margin-left: 28px;
+      }
+
+      #tooltip-sorting paper-dropdown-menu {
+        margin-left: 10px;
+        --paper-input-container-focus-color: var(--tb-orange-strong);
+        width: 105px;
+      }
     </style>
 
   </template>
@@ -4225,10 +4328,6 @@ var VZ;
           type: Function,
           computed: "_getScalarUrl(router)"
         },
-        _visibleTags: {
-          type: Array,
-          computed: "_getVisibleTags(selectedRuns.*, run2tag.*)"
-        },
         _showDownloadLinks: {
           type: Boolean,
           notify: true,
@@ -4246,10 +4345,6 @@ var VZ;
           type: Boolean,
           computed: '_computeSmoothingEnabled(_smoothingWeight)'
         },
-        colorScale: {
-          type: Object,
-          notify: true,
-        },
         _xType: {
           type: String,
           value: "step"
@@ -4263,11 +4358,6 @@ var VZ;
       _getScalarUrl: function() {
         return this.router.scalars;
       },
-      _getVisibleTags: function() {
-        var keys = this.selectedRuns;
-        var dict = this.run2tag;
-        return _.union.apply(null, keys.map(function(k) {return dict[k]}));
-      },
       _showDownloadLinksObserver: TF.URIStorage.getBooleanObserver(
           '_showDownloadLinks', false),
       _smoothingWeightObserver: TF.URIStorage.getNumberObserver(
@@ -4629,14 +4719,12 @@ var VZ;
 <dom-module id="tf-distribution-dashboard" assetpath="../tf-distribution-dashboard/">
   <template>
     <div id="plumbing">
-      <tf-color-scale id="colorScale" runs="[[runs]]" out-color-scale="{{colorScale}}"></tf-color-scale>
+      <tf-color-scale id="colorScale" runs="[[runs]]" out-color-scale="{{_colorScale}}"></tf-color-scale>
     </div>
 
     <tf-dashboard-layout>
       <div class="sidebar">
-        <div class="sidebar-section">
-          <tf-categorizer id="categorizer" tags="[[_visibleTags]]" categories="{{categories}}"></tf-categorizer>
-        </div>
+        <tf-sidebar-helper categories="{{_categories}}" color-scale="[[_colorScale]]" run2tag="[[run2tag]]" runs="[[runs]]" selected-runs="{{_selectedRuns}}">
         <div class="sidebar-section">
           <tf-option-selector id="xTypeSelector" name="Horizontal Axis" selected-id="{{_xType}}">
             <paper-button id="step">step</paper-button>
@@ -4644,15 +4732,13 @@ var VZ;
             <paper-button id="wall_time">wall</paper-button>
           </tf-option-selector>
         </div>
-        <div class="sidebar-section">
-          <tf-run-selector id="runSelector" runs="[[runs]]" color-scale="[[colorScale]]" out-selected="{{selectedRuns}}"></tf-run-selector>
-          </div>
+        </tf-sidebar-helper>
       </div>
 
       <div class="center">
-        <tf-panes-helper categories="[[categories]]" data-type="[[dataType]]" data-provider="[[dataProvider]]" run2tag="[[run2tag]]" selected-runs="[[selectedRuns]]" repeat-for-runs="">
+        <tf-panes-helper categories="[[_categories]]" data-type="[[dataType]]" data-provider="[[dataProvider]]" run2tag="[[run2tag]]" selected-runs="[[_selectedRuns]]" repeat-for-runs="">
           <template>
-            <tf-distribution-chart x-type="[[_xType]]" color-scale="[[colorScale]]"></tf-distribution-chart>
+            <tf-distribution-chart x-type="[[_xType]]" color-scale="[[_colorScale]]"></tf-distribution-chart>
           </template>
         </tf-panes-helper>
       </div>
@@ -4669,21 +4755,12 @@ var VZ;
         TF.Backend.Behavior,
       ],
       properties: {
-        _visibleTags: {
-          type: Array,
-          computed: "_getVisibleTags(selectedRuns.*, run2tag.*)"
-        },
         _xType: {
           type: String,
           value: "step"
         },
         dataType: {value: "compressedHistogram"},
       },
-      _getVisibleTags: function() {
-        var keys = this.selectedRuns;
-        var dict = this.run2tag;
-        return _.union.apply(null, keys.map(function(k) {return dict[k]}));
-      },
     });
   </script>
 </dom-module>
@@ -5371,34 +5448,30 @@ var VZ;
 <dom-module id="tf-histogram-dashboard" assetpath="../tf-histogram-dashboard/">
   <template>
     <div id="plumbing">
-      <tf-color-scale id="colorScale" runs="[[runs]]" out-color-scale="{{colorScale}}"></tf-color-scale>
+      <tf-color-scale id="colorScale" runs="[[runs]]" out-color-scale="{{_colorScale}}"></tf-color-scale>
     </div>
 
     <tf-dashboard-layout>
       <div class="sidebar">
-        <div class="sidebar-section">
-          <tf-categorizer id="categorizer" tags="[[_visibleTags]]" categories="{{categories}}"></tf-categorizer>
-        </div>
-        <div class="sidebar-section">
-          <tf-option-selector id="histogramModeSelector" name="Histogram Mode" selected-id="{{_histogramMode}}">
-            <paper-button id="overlay">overlay</paper-button>
-            <paper-button id="offset">offset</paper-button>
-          </tf-option-selector>
-        </div>
-        <div class="sidebar-section">
-          <tf-option-selector id="timePropertySelector" name="Offset Time Axis" selected-id="{{_timeProperty}}">
-            <paper-button id="step">step</paper-button>
-            <paper-button id="relative">relative</paper-button>
-            <paper-button id="wall_time">wall</paper-button>
-          </tf-option-selector>
-        </div>
-        <div class="sidebar-section">
-          <tf-run-selector id="runSelector" runs="[[runs]]" color-scale="[[colorScale]]" out-selected="{{selectedRuns}}"></tf-run-selector>
+        <tf-sidebar-helper categories="{{_categories}}" color-scale="[[_colorScale]]" run2tag="[[run2tag]]" runs="[[runs]]" selected-runs="{{_selectedRuns}}" show-download-links="{{_showDownloadLinks}}">
+          <div class="sidebar-section">
+            <tf-option-selector id="histogramModeSelector" name="Histogram Mode" selected-id="{{_histogramMode}}">
+              <paper-button id="overlay">overlay</paper-button>
+              <paper-button id="offset">offset</paper-button>
+            </tf-option-selector>
           </div>
-      </div>
+          <div class="sidebar-section">
+            <tf-option-selector id="timePropertySelector" name="Offset Time Axis" selected-id="{{_timeProperty}}">
+              <paper-button id="step">step</paper-button>
+              <paper-button id="relative">relative</paper-button>
+              <paper-button id="wall_time">wall</paper-button>
+            </tf-option-selector>
+          
+       </div>
+      </tf-sidebar-helper></div>
 
       <div class="center">
-        <tf-panes-helper categories="[[categories]]" data-type="[[dataType]]" data-provider="[[dataProvider]]" run2tag="[[run2tag]]" selected-runs="[[selectedRuns]]" repeat-for-runs="">
+        <tf-panes-helper categories="[[_categories]]" data-type="[[dataType]]" data-provider="[[dataProvider]]" run2tag="[[run2tag]]" selected-runs="[[_selectedRuns]]" repeat-for-runs="">
           <template>
             <vz-histogram-timeseries time-property="[[_timeProperty]]" mode="[[_histogramMode]]" color-scale="[[_colorScaleFunction]]"></vz-histogram-timeseries>
           </template>
@@ -5423,6 +5496,10 @@ var VZ;
         TF.Backend.Behavior,
       ],
       properties: {
+        dataType: {
+          type: String,
+          value: "histogram"
+        },
         _histogramMode: {
           type: String,
           value: "offset"
@@ -5431,60 +5508,18 @@ var VZ;
           type: String,
           value: "step"
         },
-        _visibleTags: {
-          type: Array,
-          computed: "_getVisibleTags(selectedRuns.*, run2tag.*)"
-        },
         _colorScaleFunction: {
           type: Function,
-          computed: "_getColorScaleFunction(colorScale)"
+          computed: "_getColorScaleFunction(_colorScale)"
         },
-        colorScale: Object,
-        dataType: {
-          type: String,
-          value: "histogram"
-        }
       },
       attached: function() {
         this.async(function() {
           this.fire("rendered");
         });
       },
-      _array: function(x) {
-        return [x];
-      },
-      _count: function(tags) {
-        var targetTags = {};
-        tags.forEach(function(t) {
-          targetTags[t] = true;
-        });
-        var count = 0;
-        var _this = this;
-        this.selectedRuns.forEach(function(r) {
-          _this.run2tag[r].forEach(function(t) {
-            if (targetTags[t]) {
-              count++;
-            }
-          });
-        });
-        return count;
-      },
-      _getVisibleTags: function() {
-        var keys = this.selectedRuns;
-        var dict = this.run2tag;
-        return _.union.apply(null, keys.map(function(k) {return dict[k]}));
-      },
       _getColorScaleFunction: function() {
-        return this.colorScale.scale.bind(this.colorScale);
-      },
-      toggleSelected: function(e) {
-        var currentTarget = Polymer.dom(e.currentTarget);
-        var parentDiv = currentTarget.parentNode.parentNode;
-        parentDiv.classList.toggle("selected");
-        var chartScaffold = currentTarget.previousElementSibling;
-        if (chartScaffold) {
-          chartScaffold.chart().redraw();
-        }
+        return this._colorScale.scale.bind(this._colorScale);
       },
     });
   </script>
@@ -6049,6 +6084,11 @@ Polymer({
   }
 });
 </script>
+<script src="../lodash/lodash.min.js"></script>
+<script src="../graphlib/dist/graphlib.core.js"></script>
+<script src="../dagre/dist/dagre.core.js"></script>
+<script src="../lodash/lodash.min.js"></script>
+<script src="../graphlib/dist/graphlib.core.js"></script>
 <script>/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the 'License');
@@ -13740,6 +13780,7 @@ Polymer({
 });
 </script>
 </dom-module>
+<link rel="import" href="../iron-flex-layout/iron-flex-layout.html">
 <dom-module id="tf-graph" assetpath="../tf-graph/">
 <template>
 <style>
@@ -14368,6 +14409,8 @@ Polymer({
     })();
   </script>
 </dom-module>
+<link rel="import" href="../iron-list/iron-list.html">
+<link rel="import" href="../paper-item/all-imports.html">
 <dom-module id="tf-node-info" assetpath="../tf-graph-info/">
   <style>
   .sub-list-group {
@@ -15009,6 +15052,7 @@ h2 {
 })();
 </script>
 </dom-module>
+<link rel="import" href="../paper-progress/paper-progress.html">
 
 
 <dom-module id="tf-graph-board" assetpath="../tf-graph-board/">
@@ -15183,6 +15227,8 @@ Polymer({
   }
 });
 </script>
+<link rel="import" href="../paper-radio-group/paper-radio-group.html">
+<link rel="import" href="../paper-tooltip/paper-tooltip.html">
 <dom-module id="tf-graph-controls" assetpath="../tf-graph/">
 <template>
 <style>
diff --git a/tensorflow/tensorboard/gulp_tasks/compile.js b/tensorflow/tensorboard/gulp_tasks/compile.js
index 93d3e50c3f9..501eb6eaeed 100644
--- a/tensorflow/tensorboard/gulp_tasks/compile.js
+++ b/tensorflow/tensorboard/gulp_tasks/compile.js
@@ -49,8 +49,8 @@ module.exports = function() {
     // Collect all the typescript files across the components.
     entries = entries.concat(glob(
         'components/' + component.name + '/**/*.ts',
-        // Do not include tests.
-        {ignore: 'components/' + component.name + '/**/*_test.ts'}));
+        // Do not include tests or IDE-purposed files.
+        {ignore: ['**/*_test.ts', '**/deps.d.ts']}));
     // Collect the unique external deps across all components using es6 modules.
     component.deps.forEach(function(dep) { deps['components/' + dep] = true; });
   });
diff --git a/tensorflow/tensorboard/gulp_tasks/tslint.js b/tensorflow/tensorboard/gulp_tasks/tslint.js
index 2c70202b3e7..726001fc906 100644
--- a/tensorflow/tensorboard/gulp_tasks/tslint.js
+++ b/tensorflow/tensorboard/gulp_tasks/tslint.js
@@ -18,7 +18,11 @@ var tslint = require('gulp-tslint');
 
 module.exports = function(strict) {
   return function() {
-    return gulp.src(['components/tf-*/**/*.ts', 'components/vz-*/**/*.ts'])
+    return gulp.src([
+      'components/tf-*/**/*.ts',
+      'components/vz-*/**/*.ts',
+      '!./components/**/deps.d.ts'
+    ])
         .pipe(tslint())
         .pipe(tslint.report('verbose', {
           emitError: strict,
diff --git a/tensorflow/tensorboard/gulp_tasks/vulcanize.js b/tensorflow/tensorboard/gulp_tasks/vulcanize.js
index 052dcedd2aa..b31392c2c39 100644
--- a/tensorflow/tensorboard/gulp_tasks/vulcanize.js
+++ b/tensorflow/tensorboard/gulp_tasks/vulcanize.js
@@ -54,9 +54,6 @@ function getNonTensorBoardComponents() {
       .map(function(dir) { return '/' + dir + '/'; });
 }
 
-var linkRegex = /<link rel="[^"]*" (type="[^"]*" )?href="[^"]*">\n/g;
-var scriptRegex = /<script src="[^"]*"><\/script>\n/g;
-
 module.exports = function(overwrite) {
   return function() {
     var suffix = overwrite ? '' : '.OPENSOURCE';
@@ -68,10 +65,6 @@ module.exports = function(overwrite) {
           stripComments: true,
           excludes: getNonTensorBoardComponents(),
         }))
-        // TODO(danmane): Remove this worrisome brittleness when vulcanize
-        // fixes https://github.com/Polymer/vulcanize/issues/273
-        .pipe(replace(linkRegex, ''))
-        .pipe(replace(scriptRegex, ''))
         .pipe(header(HEADER_STR))
         .pipe(rename('tf-tensorboard.html' + suffix))
         .pipe(gulp.dest('./dist'));
diff --git a/tensorflow/tensorboard/gulpfile.js b/tensorflow/tensorboard/gulpfile.js
index e09dc0aad19..4b921528189 100644
--- a/tensorflow/tensorboard/gulpfile.js
+++ b/tensorflow/tensorboard/gulpfile.js
@@ -75,6 +75,6 @@ gulp.task('default', ['watch', 'server']);
 // Clean all compiled JS files.
 var cleanCompiledTypeScript = require('gulp-clean-compiled-typescript');
 gulp.task('clean', function () {
-  return gulp.src('./components/**/*.ts')
+  return gulp.src(['./components/**/*.ts', '!./components/**/deps.d.ts'])
       .pipe(cleanCompiledTypeScript());
 });
diff --git a/tensorflow/tensorboard/tsconfig.json b/tensorflow/tensorboard/tsconfig.json
index e51e70f848f..ac69c30533f 100644
--- a/tensorflow/tensorboard/tsconfig.json
+++ b/tensorflow/tensorboard/tsconfig.json
@@ -10,6 +10,7 @@
     "node_modules",
     "typings/main.d.ts",
     "typings/main",
-    "lib"
+    "lib",
+    "components/**/deps.d.ts"
   ]
 }
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 27d9c14ec97..3d367d8ac64 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -90,6 +90,12 @@ def if_android_arm(a):
       "//conditions:default": [],
   })
 
+def if_android_arm64(a):
+  return select({
+      "//tensorflow:android_arm64": a,
+      "//conditions:default": [],
+  })
+
 def if_not_android(a):
   return select({
       "//tensorflow:android": [],
diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
index 77484a4d528..a5b0f03a25a 100644
--- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
+++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib.cc
@@ -294,7 +294,7 @@ void Generator::AppendFieldValueAppend(const FieldDescriptor& field,
 }
 
 void Generator::AppendFieldAppend(const FieldDescriptor& field) {
-  const string name = field.name();
+  const string& name = field.name();
 
   if (field.is_map()) {
     Print("{").Nest();
@@ -445,7 +445,7 @@ void Generator::AppendParseMessageFunction(const Descriptor& md) {
   Unnest().Print("}");
   for (int i = 0; i < md.field_count(); ++i) {
     const FieldDescriptor* field = md.field(i);
-    const string field_name = field->name();
+    const string& field_name = field->name();
     string mutable_value_expr;
     string set_value_prefix;
     if (map_append) {
@@ -530,7 +530,7 @@ void Generator::AppendParseMessageFunction(const Descriptor& md) {
 
       for (int enum_i = 0; enum_i < enum_d->value_count(); ++enum_i) {
         const auto* value_d = enum_d->value(enum_i);
-        const string value_name = value_d->name();
+        const string& value_name = value_d->name();
         string condition = StrCat("value == \"", value_name,
                                   "\" || value == \"", value_d->number(), "\"");
         if (value_d->number() == 0) {
diff --git a/tensorflow/user_ops/BUILD b/tensorflow/user_ops/BUILD
index dd5246ba19d..6889ab8e56a 100644
--- a/tensorflow/user_ops/BUILD
+++ b/tensorflow/user_ops/BUILD
@@ -29,6 +29,7 @@ py_tests(
     name = "ackermann_test",
     size = "small",
     srcs = ["ackermann_test.py"],
+    additional_deps = ["//tensorflow:tensorflow_py"],
     data = [":ackermann_op.so"],
 )
 
@@ -41,6 +42,7 @@ py_tests(
     name = "duplicate_op_test",
     size = "small",
     srcs = ["duplicate_op_test.py"],
+    additional_deps = ["//tensorflow:tensorflow_py"],
     data = [":duplicate_op.so"],
 )
 
@@ -53,6 +55,7 @@ py_tests(
     name = "invalid_op_test",
     size = "small",
     srcs = ["invalid_op_test.py"],
+    additional_deps = ["//tensorflow:tensorflow_py"],
     data = [":invalid_op.so"],
 )
 
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 8a664615a3c..01655f5d797 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -2,23 +2,25 @@
 
 load("//third_party/gpus:cuda_configure.bzl", "cuda_configure")
 
-# If TensorFlow is linked as a submodule, path_prefix is TensorFlow's directory
-# within the workspace (e.g. "tensorflow/"), and tf_repo_name is the name of the
-# local_repository rule (e.g. "@tf").
+# If TensorFlow is linked as a submodule.
+# path_prefix and tf_repo_name are no longer used.
 def tf_workspace(path_prefix = "", tf_repo_name = ""):
   cuda_configure(name = "local_config_cuda")
-
+  if path_prefix:
+    print("path_prefix was specified to tf_workspace but is no longer used and will be removed in the future.")
+  if tf_repo_name:
+    print("tf_repo_name was specified to tf_workspace but is no longer used and will be removed in the future.")
   # These lines need to be changed when updating Eigen. They are parsed from
   # this file by the cmake and make builds to determine the eigen version and hash.
-  eigen_version = "9e1b48c333aa"
-  eigen_sha256 = "ad2c990401a0b5529324e000737569f5f60d827f38586d5e02490252b3325c11"
+  eigen_version = "a237164a1f91"
+  eigen_sha256 = "db645b02ce5777a539797b52a18453ca557bbe456f5f28a6416897c4aadcf578"
 
   native.new_http_archive(
     name = "eigen_archive",
     url = "https://bitbucket.org/eigen/eigen/get/" + eigen_version + ".tar.gz",
     sha256 = eigen_sha256,
     strip_prefix = "eigen-eigen-" + eigen_version,
-    build_file = path_prefix + "eigen.BUILD",
+    build_file = str(Label("//:eigen.BUILD")),
   )
 
   native.git_repository(
@@ -37,7 +39,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     name = "farmhash_archive",
     url = "https://github.com/google/farmhash/archive/34c13ddfab0e35422f4c3979f360635a8c050260.zip",
     sha256 = "e3d37a59101f38fd58fb799ed404d630f0eee18bfc2a2433910977cc8fea9c28",
-    build_file = path_prefix + "farmhash.BUILD",
+    build_file = str(Label("//:farmhash.BUILD")),
   )
 
   native.bind(
@@ -56,28 +58,28 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     name = "jpeg_archive",
     url = "http://www.ijg.org/files/jpegsrc.v9a.tar.gz",
     sha256 = "3a753ea48d917945dd54a2d97de388aa06ca2eb1066cbfdc6652036349fe05a7",
-    build_file = path_prefix + "jpeg.BUILD",
+    build_file = str(Label("//:jpeg.BUILD")),
   )
 
   native.new_http_archive(
     name = "png_archive",
     url = "https://github.com/glennrp/libpng/archive/v1.2.53.zip",
     sha256 = "c35bcc6387495ee6e757507a68ba036d38ad05b415c2553b3debe2a57647a692",
-    build_file = path_prefix + "png.BUILD",
+    build_file = str(Label("//:png.BUILD")),
   )
 
   native.new_http_archive(
     name = "gif_archive",
     url = "http://ufpr.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz",
     sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1",
-    build_file = path_prefix + "gif.BUILD",
+    build_file = str(Label("//:gif.BUILD")),
   )
 
   native.new_http_archive(
     name = "six_archive",
     url = "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz#md5=34eed507548117b2ab523ab14b2f8b55",
     sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a",
-    build_file = path_prefix + "six.BUILD",
+    build_file = str(Label("//:six.BUILD")),
   )
 
   native.bind(
@@ -95,7 +97,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     name = "gmock_archive",
     url = "http://pkgs.fedoraproject.org/repo/pkgs/gmock/gmock-1.7.0.zip/073b984d8798ea1594f5e44d85b20d66/gmock-1.7.0.zip",
     sha256 = "26fcbb5925b74ad5fc8c26b0495dfc96353f4d553492eb97e85a8a6d2f43095b",
-    build_file = path_prefix + "gmock.BUILD",
+    build_file = str(Label("//:gmock.BUILD")),
   )
 
   native.bind(
@@ -110,7 +112,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
   native.bind(
     name = "python_headers",
-    actual = tf_repo_name + "//util/python:python_headers",
+    actual = str(Label("//util/python:python_headers")),
   )
 
   # grpc expects //external:protobuf_clib and //external:protobuf_compiler
@@ -130,7 +132,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     commit = "d7ff4ff40071d2b486a052183e3e9f9382afb745",
     init_submodules = True,
     remote = "https://github.com/grpc/grpc.git",
-    build_file = path_prefix + "grpc.BUILD",
+    build_file = str(Label("//:grpc.BUILD")),
   )
 
   # protobuf expects //external:grpc_cpp_plugin to point to grpc's
@@ -149,7 +151,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     name = "jsoncpp_git",
     remote = "https://github.com/open-source-parsers/jsoncpp.git",
     commit = "11086dd6a7eba04289944367ca82cea71299ed70",
-    build_file = path_prefix + "jsoncpp.BUILD",
+    build_file = str(Label("//:jsoncpp.BUILD")),
   )
 
   native.bind(
@@ -167,7 +169,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     name = "nanopb_git",
     commit = "1251fa1",
     remote = "https://github.com/nanopb/nanopb.git",
-    build_file = path_prefix + "nanopb.BUILD",
+    build_file = str(Label("//:nanopb.BUILD")),
   )
 
   native.bind(
@@ -179,26 +181,26 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     name = "avro_archive",
     url = "http://www-us.apache.org/dist/avro/avro-1.8.0/cpp/avro-cpp-1.8.0.tar.gz",
     sha256 = "ec6e2ec957e95ca07f70cc25f02f5c416f47cb27bd987a6ec770dcbe72527368",
-    build_file = path_prefix + "avro.BUILD",
+    build_file = str(Label("//:avro.BUILD")),
   )
 
   native.new_http_archive(
     name = "boost_archive",
     url = "http://pilotfiber.dl.sourceforge.net/project/boost/boost/1.61.0/boost_1_61_0.tar.gz",
     sha256 = "a77c7cc660ec02704c6884fbb20c552d52d60a18f26573c9cee0788bf00ed7e6",
-    build_file = path_prefix + "boost.BUILD",
+    build_file = str(Label("//:boost.BUILD")),
   )
 
   native.new_http_archive(
     name = "bzip2_archive",
     url = "http://www.bzip.org/1.0.6/bzip2-1.0.6.tar.gz",
     sha256 = "a2848f34fcd5d6cf47def00461fcb528a0484d8edef8208d6d2e2909dc61d9cd",
-    build_file = path_prefix + "bzip2.BUILD",
+    build_file = str(Label("//:bzip2.BUILD")),
   )
 
   native.new_http_archive(
     name = "zlib_archive",
     url = "http://zlib.net/zlib-1.2.8.tar.gz",
     sha256 = "36658cb768a54c1d4dec43c3116c27ed893e88b02ecfcb44f2166f9c0b7f2a0d",
-    build_file = path_prefix + "zlib.BUILD",
+    build_file = str(Label("//:zlib.BUILD")),
   )