Merge changes from github.

PiperOrigin-RevId: 194997009
2018-05-01 14:28:36 -07:00 · 2018-05-01 14:28:36 -07:00 · 325d0ef21a
commit 325d0ef21a
parent 46bf1e8934
121 changed files with 1809 additions and 724 deletions
--- a/.gitignore
+++ b/.gitignore
@ -27,6 +27,7 @@ Podfile.lock
 /tensorflow/contrib/lite/examples/ios/simple/data/*.txt
 /tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
 xcuserdata/**
 /api_init_files_list.txt
 # Android
 .gradle
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@ -1700,7 +1700,7 @@ TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_NoGradInputs) {
  TestGradientsError(false);
 }
-// REGISTER_OP for CApiTestAttributesTest test cases.
+// REGISTER_OP for CApiAttributesTest test cases.
 // Registers two ops, each with a single attribute called 'v'.
 // The attribute in one op will have a type 'type', the other
 // will have list(type).
--- a/tensorflow/cc/gradients/array_grad.cc
+++ b/tensorflow/cc/gradients/array_grad.cc
@ -385,6 +385,42 @@ Status MirrorPadGradGrad(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("MirrorPadGrad", MirrorPadGradGrad);
 Status StridedSliceGradHelper(const Scope& scope, const Operation& op,
                              const std::vector<Output>& grad_inputs,
                              std::vector<Output>* grad_outputs) {
  Input x = Shape(scope, op.input(0));
  Input begin = op.input(1);
  Input end = op.input(2);
  Input strides = op.input(3);
  int64 begin_mask;
  int64 end_mask;
  int64 ellipsis_mask;
  int64 new_axis_mask;
  int64 shrink_axis_mask;
  TF_RETURN_IF_ERROR(
      GetNodeAttr(op.node()->attrs(), "begin_mask", &begin_mask));
  TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "end_mask", &end_mask));
  TF_RETURN_IF_ERROR(
      GetNodeAttr(op.node()->attrs(), "ellipsis_mask", &ellipsis_mask));
  TF_RETURN_IF_ERROR(
      GetNodeAttr(op.node()->attrs(), "new_axis_mask", &new_axis_mask));
  TF_RETURN_IF_ERROR(
      GetNodeAttr(op.node()->attrs(), "shrink_axis_mask", &shrink_axis_mask));
  grad_outputs->push_back(
      StridedSliceGrad(scope, x, begin, end, strides, grad_inputs[0],
                       StridedSliceGrad::BeginMask(begin_mask)
                           .EndMask(end_mask)
                           .EllipsisMask(ellipsis_mask)
                           .NewAxisMask(new_axis_mask)
                           .ShrinkAxisMask(shrink_axis_mask)));
  // No gradients returned for begin, end and strides
  grad_outputs->push_back(NoGradient());
  grad_outputs->push_back(NoGradient());
  grad_outputs->push_back(NoGradient());
  return scope.status();
 }
 REGISTER_GRADIENT_OP("StridedSlice", StridedSliceGradHelper);
 }  // anonymous namespace
 }  // namespace ops
 }  // namespace tensorflow
--- a/tensorflow/cc/gradients/array_grad_test.cc
+++ b/tensorflow/cc/gradients/array_grad_test.cc
@ -354,5 +354,29 @@ TEST_F(ArrayGradTest, MirrorPadGradGrad_Symmetric) {
  RunTest(x, x_shape, y, y_shape);
 }
 TEST_F(ArrayGradTest, StridedSliceGrad) {
  TensorShape x_shape({6, 4, 4});
  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
  // y = x[2:6:2, 1:3, 1:3]
  auto y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1});
  // y.shape = [2, 2, 2];
  RunTest(x, x_shape, y, {2, 2, 2});
  // y = x[2:6:2, 1:3, 1:3]
  // begin_mask = 1<<1 (ignore begin_index = 1)
  // end_mask = 1<<2 (ignore end_index = 2)
  y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1},
                   StridedSlice::BeginMask(1 << 1).EndMask(1 << 2));
  // y.shape = [2, 3, 3];
  RunTest(x, x_shape, y, {2, 3, 3});
  // y = [tf.newaxis, 2:6:2, 1:3, 1:3]
  y = StridedSlice(scope_, x, {0, 2, 1, 1}, {0, 6, 3, 3}, {1, 2, 1, 1},
                   StridedSlice::NewAxisMask(1 << 0));
  // y.shape = [1, 2, 2, 2];
  RunTest(x, x_shape, y, {1, 2, 2, 2});
 }
 }  // namespace
 }  // namespace tensorflow
--- a/tensorflow/contrib/autograph/README.md
+++ b/tensorflow/contrib/autograph/README.md
@ -56,8 +56,6 @@ Use AutoGraph in one of the following ways, described below:
 1. Annotations (simpler)
 2. Functional API (more flexible)
 NOTE: You can find more examples in this [interactive notebook](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb).
 To get started, install the latest nightly TensorFlow build:
 ```shell
@ -70,6 +68,13 @@ Then import the `autograph` module from `tf.contrib`:
 from tensorflow.contrib import autograph as ag
 ```
 ### Interactive demo notebooks
 For more extensive examples, check out these interactive notebooks:
 * [RNN trained using Keras and Estimators](https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/rnn_keras_estimator.ipynb)
 * [Demo from the TF Dev Summit 2018](https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/autograph/examples/notebooks/dev_summit_2018_demo.ipynb)
 ## Using with annotations
 Annotating a function or class with `@convert` converts it in place:
--- a/tensorflow/contrib/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/cmake/CMakeLists.txt
@ -84,7 +84,7 @@ if (NOT WIN32)
  option(systemlib_ALL "Turn on every possible systemlib_* options" OFF)
  if (systemlib_ALL)
-    set (systmelib_ZLIB ON)
+    set (systemlib_ZLIB ON)
  endif (systemlib_ALL)
 endif()
@ -471,6 +471,10 @@ if (tensorflow_ENABLE_GPU)
  include_directories(${tensorflow_source_dir}/third_party/gpus)
  # add cuda libraries to tensorflow_EXTERNAL_LIBRARIES
  list(APPEND tensorflow_EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
  if(NOT WIN32)
    # add gomp to tensorflow_EXTERNAL_LIBRARIES, needed by libcusolver.so
    list(APPEND tensorflow_EXTERNAL_LIBRARIES gomp)
  endif()
  # NOTE(mrry): Update these flags when the version of CUDA or cuDNN used
  # in the default build is upgraded.
--- a/tensorflow/contrib/cmake/tf_core_kernels.cmake
+++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake
@ -177,6 +177,16 @@ if(WIN32)
      "${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc"
  )
  list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_windows_exclude_srcs})
 else(WIN32)
  if(tensorflow_ENABLE_GPU)
    file(GLOB_RECURSE tf_core_kernels_gpu_exclude_srcs
        # temporarily disable nccl as it needs to be ported with gpu
        "${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_manager.cc"
        "${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_ops.cc"
        "${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc"
    )
    list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_gpu_exclude_srcs})
  endif(tensorflow_ENABLE_GPU)
 endif(WIN32)
 file(GLOB_RECURSE tf_core_gpu_kernels_srcs
--- a/tensorflow/contrib/cmake/tf_stream_executor.cmake
+++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake
@ -64,6 +64,8 @@ file(GLOB tf_stream_executor_srcs
 if (tensorflow_ENABLE_GPU)
    file(GLOB tf_stream_executor_gpu_srcs
        "${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc"
        "${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.h"
        "${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.cc"
    )
    if (NOT tensorflow_BUILD_CC_TESTS)
        file(GLOB tf_stream_executor_gpu_tests
--- a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
+++ b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
@ -152,6 +152,22 @@ class CrfTest(test.TestCase):
        self.assertAllClose(tf_log_norm, tf_brute_force_log_norm)
  def testCrfLogNormZeroSeqLength(self):
    """
    Test `crf_log_norm` when `sequence_lengths` contains one or more zeros.
    """
    with self.test_session() as sess:
      inputs = constant_op.constant(np.ones([2, 10, 5],
                                            dtype=np.float32))
      transition_params = constant_op.constant(np.ones([5, 5],
                                                       dtype=np.float32))
      sequence_lengths = constant_op.constant(np.zeros([2],
                                                       dtype=np.int32))
      expected_log_norm = np.zeros([2], dtype=np.float32)
      log_norm = crf.crf_log_norm(inputs, sequence_lengths, transition_params)
      tf_log_norm = sess.run(log_norm)
      self.assertAllClose(tf_log_norm, expected_log_norm)
  def testCrfLogLikelihood(self):
    inputs = np.array(
        [[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32)
@ -292,10 +308,10 @@ class CrfTest(test.TestCase):
                                                       dtype=np.float32))
      sequence_lengths = constant_op.constant(np.zeros([2],
                                                       dtype=np.int32))
-      values = crf.crf_decode(inputs, transition_params, sequence_lengths)
+      tags, scores = crf.crf_decode(inputs, transition_params, sequence_lengths)
-      tags, scores = sess.run(values)
+      tf_tags, tf_scores = sess.run([tags, scores])
-      self.assertEqual(len(tags.shape), 2)
+      self.assertEqual(len(tf_tags.shape), 2)
-      self.assertEqual(len(scores.shape), 1)
+      self.assertEqual(len(tf_scores.shape), 1)
 if __name__ == "__main__":
  test.main()
--- a/tensorflow/contrib/crf/python/ops/crf.py
+++ b/tensorflow/contrib/crf/python/ops/crf.py
@ -90,9 +90,13 @@ def crf_sequence_score(inputs, tag_indices, sequence_lengths,
    batch_size = array_ops.shape(inputs, out_type=tag_indices.dtype)[0]
    example_inds = array_ops.reshape(
        math_ops.range(batch_size, dtype=tag_indices.dtype), [-1, 1])
-    return array_ops.gather_nd(
+    sequence_scores = array_ops.gather_nd(
        array_ops.squeeze(inputs, [1]),
        array_ops.concat([example_inds, tag_indices], axis=1))
    sequence_scores = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
                                      array_ops.zeros_like(sequence_scores),
                                      sequence_scores)
    return sequence_scores
  def _multi_seq_fn():
    # Compute the scores of the given tag sequence.
@ -128,7 +132,12 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
  # If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp over
  # the "initial state" (the unary potentials).
  def _single_seq_fn():
-    return math_ops.reduce_logsumexp(first_input, [1])
+    log_norm = math_ops.reduce_logsumexp(first_input, [1])
    # Mask `log_norm` of the sequences with length <= zero.
    log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
                               array_ops.zeros_like(log_norm),
                               log_norm)
    return log_norm
  def _multi_seq_fn():
    """Forward computation of alpha values."""
@ -137,13 +146,19 @@ def crf_log_norm(inputs, sequence_lengths, transition_params):
    # Compute the alpha values in the forward algorithm in order to get the
    # partition function.
    forward_cell = CrfForwardRnnCell(transition_params)
    # Sequence length is not allowed to be less than zero.
    sequence_lengths_less_one = math_ops.maximum(0, sequence_lengths - 1)
    _, alphas = rnn.dynamic_rnn(
        cell=forward_cell,
        inputs=rest_of_input,
-        sequence_length=sequence_lengths - 1,
+        sequence_length=sequence_lengths_less_one,
        initial_state=first_input,
        dtype=dtypes.float32)
    log_norm = math_ops.reduce_logsumexp(alphas, [1])
    # Mask `log_norm` of the sequences with length <= zero.
    log_norm = array_ops.where(math_ops.less_equal(sequence_lengths, 0),
                               array_ops.zeros_like(log_norm),
                               log_norm)
    return log_norm
  max_seq_len = array_ops.shape(inputs)[1]
@ -479,7 +494,7 @@ def crf_decode(potentials, transition_params, sequence_length):
    initial_state = array_ops.slice(potentials, [0, 0, 0], [-1, 1, -1])
    initial_state = array_ops.squeeze(initial_state, axis=[1])  # [B, O]
    inputs = array_ops.slice(potentials, [0, 1, 0], [-1, -1, -1])  # [B, T-1, O]
-    # sequence length is not allowed to be less than zero
+    # Sequence length is not allowed to be less than zero.
    sequence_length_less_one = math_ops.maximum(0, sequence_length - 1)
    backpointers, last_score = rnn.dynamic_rnn(  # [B, T - 1, O], [B, O]
        crf_fwd_cell,
--- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/ordered_test.py
@ -0,0 +1,109 @@
 # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Tests for Bijector."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import numpy as np
 from tensorflow.contrib.distributions.python.ops.bijectors.ordered import Ordered
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.distributions.bijector_test_util import assert_bijective_and_finite
 from tensorflow.python.platform import test
 class OrderedBijectorTest(test.TestCase):
  """Tests correctness of the ordered transformation."""
  def setUp(self):
    self._rng = np.random.RandomState(42)
  @test_util.run_in_graph_and_eager_modes()
  def testBijectorVector(self):
    with self.test_session():
      ordered = Ordered()
      self.assertEqual("ordered", ordered.name)
      x = np.asarray([[2., 3, 4], [4., 8, 13]])
      y = [[2., 0, 0], [4., np.log(4.), np.log(5.)]]
      self.assertAllClose(y, self.evaluate(ordered.forward(x)))
      self.assertAllClose(x, self.evaluate(ordered.inverse(y)))
      self.assertAllClose(
          np.sum(np.asarray(y)[..., 1:], axis=-1),
          self.evaluate(ordered.inverse_log_det_jacobian(y, event_ndims=1)),
          atol=0.,
          rtol=1e-7)
      self.assertAllClose(
          self.evaluate(-ordered.inverse_log_det_jacobian(y, event_ndims=1)),
          self.evaluate(ordered.forward_log_det_jacobian(x, event_ndims=1)),
          atol=0.,
          rtol=1e-7)
  def testBijectorUnknownShape(self):
    with self.test_session():
      ordered = Ordered()
      self.assertEqual("ordered", ordered.name)
      x = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32)
      real_x = np.asarray([[2., 3, 4], [4., 8, 13]])
      y = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32)
      real_y = [[2., 0, 0], [4., np.log(4.), np.log(5.)]]
      self.assertAllClose(real_y, ordered.forward(x).eval(
          feed_dict={x: real_x}))
      self.assertAllClose(real_x, ordered.inverse(y).eval(
          feed_dict={y: real_y}))
      self.assertAllClose(
          np.sum(np.asarray(real_y)[..., 1:], axis=-1),
          ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(
              feed_dict={y: real_y}),
          atol=0.,
          rtol=1e-7)
      self.assertAllClose(
          -ordered.inverse_log_det_jacobian(y, event_ndims=1).eval(
              feed_dict={y: real_y}),
          ordered.forward_log_det_jacobian(x, event_ndims=1).eval(
              feed_dict={x: real_x}),
          atol=0.,
          rtol=1e-7)
  @test_util.run_in_graph_and_eager_modes()
  def testShapeGetters(self):
    with self.test_session():
      x = tensor_shape.TensorShape([4])
      y = tensor_shape.TensorShape([4])
      bijector = Ordered(validate_args=True)
      self.assertAllEqual(y, bijector.forward_event_shape(x))
      self.assertAllEqual(y.as_list(),
                          self.evaluate(bijector.forward_event_shape_tensor(
                              x.as_list())))
      self.assertAllEqual(x, bijector.inverse_event_shape(y))
      self.assertAllEqual(x.as_list(),
                          self.evaluate(bijector.inverse_event_shape_tensor(
                              y.as_list())))
  def testBijectiveAndFinite(self):
    with self.test_session():
      ordered = Ordered()
      x = np.sort(self._rng.randn(3, 10), axis=-1).astype(np.float32)
      y = (self._rng.randn(3, 10)).astype(np.float32)
      assert_bijective_and_finite(ordered, x, y, event_ndims=1)
 if __name__ == "__main__":
  test.main()
--- a/tensorflow/contrib/distributions/python/ops/bijectors/init.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/init.py
@ -30,6 +30,7 @@
@@Invert
@@Kumaraswamy
@@MaskedAutoregressiveFlow
@@Ordered
@@Permute
@@PowerTransform
@@RealNVP
@ -67,6 +68,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.inline import *
 from tensorflow.contrib.distributions.python.ops.bijectors.invert import *
 from tensorflow.contrib.distributions.python.ops.bijectors.kumaraswamy import *
 from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import *
 from tensorflow.contrib.distributions.python.ops.bijectors.ordered import *
 from tensorflow.contrib.distributions.python.ops.bijectors.permute import *
 from tensorflow.contrib.distributions.python.ops.bijectors.power_transform import *
 from tensorflow.contrib.distributions.python.ops.bijectors.real_nvp import *
--- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py
@ -170,7 +170,7 @@ class CholeskyOuterProduct(bijector.Bijector):
    sum_weighted_log_diag = array_ops.squeeze(
        math_ops.matmul(math_ops.log(diag),
                        exponents[..., array_ops.newaxis]),
-        squeeze_dims=-1)
+        axis=-1)
    fldj = p_float * np.log(2.) + sum_weighted_log_diag
    return fldj
--- a/tensorflow/contrib/distributions/python/ops/bijectors/invert.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/invert.py
@ -18,14 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-from tensorflow.python.ops.distributions import bijector as bijector_lib
+from tensorflow.python.ops.distributions import bijector
 __all__ = [
    "Invert",
 ]
-class Invert(bijector_lib.Bijector):
+class Invert(bijector.Bijector):
  """Bijector which inverts another Bijector.
  Example Use: [ExpGammaDistribution (see Background & Context)](
--- a/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/masked_autoregressive.py
@ -32,7 +32,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import template as template_ops
 from tensorflow.python.ops import variable_scope as variable_scope_lib
-from tensorflow.python.ops.distributions import bijector as bijector_lib
+from tensorflow.python.ops.distributions import bijector
 __all__ = [
@ -42,7 +42,7 @@ __all__ = [
 ]
-class MaskedAutoregressiveFlow(bijector_lib.Bijector):
+class MaskedAutoregressiveFlow(bijector.Bijector):
  """Affine MaskedAutoregressiveFlow bijector for vector-valued events.
  The affine autoregressive flow [(Papamakarios et al., 2016)][3] provides a
--- a/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/ordered.py
@ -0,0 +1,125 @@
 # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Ordered bijector."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.distributions import bijector
 __all__ = [
    "Ordered",
 ]
 class Ordered(bijector.Bijector):
  """Bijector which maps a tensor x_k that has increasing elements in the last
  dimension to an unconstrained tensor y_k.
  Both the domain and the codomain of the mapping is [-inf, inf], however,
  the input of the forward mapping must be strictly increasing.
  The inverse of the bijector applied to a normal random vector `y ~ N(0, 1)`
  gives back a sorted random vector with the same distribution `x ~ N(0, 1)`
  where `x = sort(y)`
  On the last dimension of the tensor, Ordered bijector performs:
  `y[0] = x[0]`
  `y[1:] = math_ops.log(x[1:] - x[:-1])`
  #### Example Use:
  ```python
  bijector.Ordered().forward([2, 3, 4])
  # Result: [2., 0., 0.]
  bijector.Ordered().inverse([0.06428002, -1.07774478, -0.71530371])
  # Result: [0.06428002, 0.40464228, 0.8936858]
  ```
  """
  def __init__(self, validate_args=False, name="ordered"):
    super(Ordered, self).__init__(
        forward_min_event_ndims=1,
        validate_args=validate_args,
        name=name)
  def _forward_event_shape(self, input_shape):
    if input_shape.ndims is None or input_shape[-1] is None:
      return input_shape
    return tensor_shape.TensorShape([input_shape[-1]])
  def _forward_event_shape_tensor(self, input_shape):
    return (input_shape[-1])[..., array_ops.newaxis]
  def _inverse_event_shape(self, output_shape):
    if output_shape.ndims is None or output_shape[-1] is None:
      return output_shape
    if output_shape[-1] <= 1:
      raise ValueError("output_shape[-1] = %d <= 1" % output_shape[-1])
    return tensor_shape.TensorShape([output_shape[-1]])
  def _inverse_event_shape_tensor(self, output_shape):
    if self.validate_args:
      is_greater_one = check_ops.assert_greater(
          output_shape[-1], 1, message="Need last dimension greater than 1.")
      output_shape = control_flow_ops.with_dependencies(
          [is_greater_one], output_shape)
    return (output_shape[-1])[..., array_ops.newaxis]
  def _forward(self, x):
    x = self._maybe_assert_valid_x(x)
    y0 = x[..., 0, array_ops.newaxis]
    yk = math_ops.log(x[..., 1:] - x[..., :-1])
    y = array_ops.concat([y0, yk], axis=-1)
    return y
  def _inverse(self, y):
    x0 = y[..., 0, array_ops.newaxis]
    xk = math_ops.exp(y[..., 1:])
    x = array_ops.concat([x0, xk], axis=-1)
    return math_ops.cumsum(x, axis=-1)
  def _inverse_log_det_jacobian(self, y):
    # The Jacobian of the inverse mapping is lower
    # triangular, with the diagonal elements being:
    # J[i,i] = 1 if i=1, and
    #          exp(y_i) if 1<i<=K
    # which gives the absolute Jacobian determinant:
    # |det(Jac)| = prod_{i=1}^{K} exp(y[i]).
    # (1) - Stan Modeling Language User's Guide and Reference Manual
    #       Version 2.17.0 session 35.2
    return math_ops.reduce_sum(y[..., 1:], axis=-1)
  def _forward_log_det_jacobian(self, x):
    x = self._maybe_assert_valid_x(x)
    return -math_ops.reduce_sum(
        math_ops.log(x[..., 1:] - x[..., :-1]),
        axis=-1)
  def _maybe_assert_valid_x(self, x):
    if not self.validate_args:
      return x
    is_valid = check_ops.assert_positive(
        x[..., 1:] - x[..., :-1],
        message="Forward transformation input must be strictly increasing.")
    return control_flow_ops.with_dependencies([is_valid], x)
--- a/tensorflow/contrib/distributions/python/ops/bijectors/permute.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/permute.py
@ -28,7 +28,7 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops.distributions import bijector as bijector_lib
+from tensorflow.python.ops.distributions import bijector
 __all__ = [
@ -36,7 +36,7 @@ __all__ = [
 ]
-class Permute(bijector_lib.Bijector):
+class Permute(bijector.Bijector):
  """Permutes the rightmost dimension of a `Tensor`.
  ```python
--- a/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/real_nvp.py
@ -25,7 +25,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import template as template_ops
-from tensorflow.python.ops.distributions import bijector as bijector_lib
+from tensorflow.python.ops.distributions import bijector
 __all__ = [
@ -34,7 +34,7 @@ __all__ = [
 ]
-class RealNVP(bijector_lib.Bijector):
+class RealNVP(bijector.Bijector):
  """RealNVP "affine coupling layer" for vector-valued events.
  Real NVP models a normalizing flow on a `D`-dimensional distribution via a
--- a/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/reshape.py
@ -28,7 +28,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import bijector as bijector_lib
+from tensorflow.python.ops.distributions import bijector
 __all__ = [
@ -44,7 +44,7 @@ def _ndims_from_shape(shape):
  return array_ops.shape(shape)[0]
-class Reshape(bijector_lib.Bijector):
+class Reshape(bijector.Bijector):
  """Reshapes the `event_shape` of a `Tensor`.
  The semantics generally follow that of `tf.reshape()`, with
--- a/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/weibull.py
@ -128,7 +128,7 @@ class Weibull(bijector.Bijector):
      return x
    is_valid = check_ops.assert_non_negative(
        x,
-        message="Forward transformation input must be at least {}.".format(0))
+        message="Forward transformation input must be at least 0.")
    return control_flow_ops.with_dependencies([is_valid], x)
  def _maybe_assert_valid_y(self, y):
--- a/tensorflow/contrib/distributions/python/ops/shape.py
+++ b/tensorflow/contrib/distributions/python/ops/shape.py
@ -439,7 +439,7 @@ class _DistributionShape(object):
          if self._batch_ndims_is_0 and expand_batch_dim:
            squeeze_dims += [1]
          if squeeze_dims:
-            x = array_ops.squeeze(x, squeeze_dims=squeeze_dims)
+            x = array_ops.squeeze(x, axis=squeeze_dims)
            # x.shape: [prod(S)]+B+E
        _, batch_shape, event_shape = self.get_shape(x)
      else:
--- a/tensorflow/contrib/factorization/python/ops/gmm_ops.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py
@ -397,7 +397,7 @@ class GmmAlgorithm(object):
    # Compute the effective number of data points assigned to component k.
    with ops.control_dependencies(self._w):
      points_in_k = array_ops.squeeze(
-          math_ops.add_n(self._points_in_k), squeeze_dims=[0])
+          math_ops.add_n(self._points_in_k), axis=[0])
      # Update alpha.
      if 'w' in self._params:
        final_points_in_k = points_in_k / num_batches
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@ -932,7 +932,8 @@ def convolution(inputs,
                variables_collections=None,
                outputs_collections=None,
                trainable=True,
-                scope=None):
+                scope=None,
                conv_dims=None):
  """Adds an N-D convolution followed by an optional batch_norm layer.
  It is required that 1 <= N <= 3.
@ -993,6 +994,10 @@ def convolution(inputs,
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for `variable_scope`.
    conv_dims: Optional convolution dimensionality, when set it would use the
      corresponding convolution (e.g. 2 for Conv 2D, 3 for Conv 3D, ..). When
      leaved to None it would select the convolution dimensionality based on
      the input rank (i.e. Conv ND, with N = input_rank - 2).
  Returns:
    A tensor representing the output of the operation.
@ -1015,6 +1020,9 @@ def convolution(inputs,
    inputs = ops.convert_to_tensor(inputs)
    input_rank = inputs.get_shape().ndims
    if conv_dims is not None and conv_dims + 2 != input_rank:
      raise ValueError('Convolution expects input with rank %d, got %d' %
                       (conv_dims + 2, input_rank))
    if input_rank == 3:
      layer_class = convolutional_layers.Convolution1D
    elif input_rank == 4:
@ -1061,10 +1069,134 @@ def convolution(inputs,
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
@add_arg_scope
 def convolution1d(inputs,
                  num_outputs,
                  kernel_size,
                  stride=1,
                  padding='SAME',
                  data_format=None,
                  rate=1,
                  activation_fn=nn.relu,
                  normalizer_fn=None,
                  normalizer_params=None,
                  weights_initializer=initializers.xavier_initializer(),
                  weights_regularizer=None,
                  biases_initializer=init_ops.zeros_initializer(),
                  biases_regularizer=None,
                  reuse=None,
                  variables_collections=None,
                  outputs_collections=None,
                  trainable=True,
                  scope=None):
  return convolution(inputs,
                     num_outputs,
                     kernel_size,
                     stride,
                     padding,
                     data_format,
                     rate,
                     activation_fn,
                     normalizer_fn,
                     normalizer_params,
                     weights_initializer,
                     weights_regularizer,
                     biases_initializer,
                     biases_regularizer,
                     reuse,
                     variables_collections,
                     outputs_collections,
                     trainable,
                     scope,
                     conv_dims=1)
-convolution2d = convolution
+convolution1d.__doc__ = convolution.__doc__
 convolution3d = convolution
@add_arg_scope
 def convolution2d(inputs,
                  num_outputs,
                  kernel_size,
                  stride=1,
                  padding='SAME',
                  data_format=None,
                  rate=1,
                  activation_fn=nn.relu,
                  normalizer_fn=None,
                  normalizer_params=None,
                  weights_initializer=initializers.xavier_initializer(),
                  weights_regularizer=None,
                  biases_initializer=init_ops.zeros_initializer(),
                  biases_regularizer=None,
                  reuse=None,
                  variables_collections=None,
                  outputs_collections=None,
                  trainable=True,
                  scope=None):
  return convolution(inputs,
                     num_outputs,
                     kernel_size,
                     stride,
                     padding,
                     data_format,
                     rate,
                     activation_fn,
                     normalizer_fn,
                     normalizer_params,
                     weights_initializer,
                     weights_regularizer,
                     biases_initializer,
                     biases_regularizer,
                     reuse,
                     variables_collections,
                     outputs_collections,
                     trainable,
                     scope,
                     conv_dims=2)
 convolution2d.__doc__ = convolution.__doc__
@add_arg_scope
 def convolution3d(inputs,
                  num_outputs,
                  kernel_size,
                  stride=1,
                  padding='SAME',
                  data_format=None,
                  rate=1,
                  activation_fn=nn.relu,
                  normalizer_fn=None,
                  normalizer_params=None,
                  weights_initializer=initializers.xavier_initializer(),
                  weights_regularizer=None,
                  biases_initializer=init_ops.zeros_initializer(),
                  biases_regularizer=None,
                  reuse=None,
                  variables_collections=None,
                  outputs_collections=None,
                  trainable=True,
                  scope=None):
  return convolution(inputs,
                     num_outputs,
                     kernel_size,
                     stride,
                     padding,
                     data_format,
                     rate,
                     activation_fn,
                     normalizer_fn,
                     normalizer_params,
                     weights_initializer,
                     weights_regularizer,
                     biases_initializer,
                     biases_regularizer,
                     reuse,
                     variables_collections,
                     outputs_collections,
                     trainable,
                     scope,
                     conv_dims=3)
 convolution3d.__doc__ = convolution.__doc__
@add_arg_scope
 def convolution2d_in_plane(
@ -1411,7 +1543,7 @@ def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None):
  Args:
     tensor: An `int` `Tensor` to be converted to a `Sparse`.
     eos_token: An integer.
-       It is part of the target label that signfies the end of a sentence.
+       It is part of the target label that signifies the end of a sentence.
     outputs_collections: Collection to add the outputs.
     scope: Optional scope for name_scope.
  """
@ -1555,7 +1687,7 @@ def _inner_flatten(inputs, new_rank, output_collections=None, scope=None):
    output_collections: Collection to which the outputs will be added.
    scope: Optional scope for `name_scope`.
  Returns:
-    A `Tensor` or `SparseTensor` conataining the same values as `inputs`, but
+    A `Tensor` or `SparseTensor` containing the same values as `inputs`, but
    with innermost dimensions flattened to obtain rank `new_rank`.
  Raises:
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@ -310,6 +310,17 @@ class BiasAddTest(test.TestCase):
 class ConvolutionTest(test.TestCase):
  def testInvalidShape(self):
    with self.test_session():
      images_2d = random_ops.random_uniform((5, 7, 9, 3), seed=1)
      with self.assertRaisesRegexp(
          ValueError, 'Convolution expects input with rank 5, got 4'):
        layers_lib.convolution3d(images_2d, 32, 3)
      images_3d = random_ops.random_uniform((5, 6, 7, 9, 3), seed=1)
      with self.assertRaisesRegexp(
          ValueError, 'Convolution expects input with rank 4, got 5'):
        layers_lib.convolution2d(images_3d, 32, 3)
  def testInvalidDataFormat(self):
    height, width = 7, 9
    with self.test_session():
@ -3155,7 +3166,7 @@ class RepeatTests(test.TestCase):
    with self.test_session():
      images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32)
      output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3])
-      self.assertEqual(output.op.name, 'Repeat/convolution_3/Relu')
+      self.assertEqual(output.op.name, 'Repeat/convolution2d_3/Relu')
      self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 32])
  def testRepeatWithScope(self):
@ -3749,7 +3760,7 @@ class StackTests(test.TestCase):
          layers_lib.convolution2d, [10, 20, 30],
          kernel_size=[3, 3],
          padding='SAME')
-      self.assertEqual(output.op.name, 'Stack/convolution_3/Relu')
+      self.assertEqual(output.op.name, 'Stack/convolution2d_3/Relu')
      self.assertListEqual(output.get_shape().as_list(), [5, 3, 3, 30])
  def testStackWithScope(self):
--- a/tensorflow/contrib/layers/python/layers/target_column.py
+++ b/tensorflow/contrib/layers/python/layers/target_column.py
@ -270,7 +270,7 @@ class _RegressionTargetColumn(_TargetColumn):
  def logits_to_predictions(self, logits, proba=False):
    if self.num_label_columns == 1:
-      return array_ops.squeeze(logits, squeeze_dims=[1])
+      return array_ops.squeeze(logits, axis=[1])
    return logits
  def get_eval_ops(self, features, logits, labels, metrics=None):
@ -418,7 +418,7 @@ def _softmax_cross_entropy_loss(logits, target):
                     "Instead got %s." % target.dtype)
  # sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
  if len(target.get_shape()) == 2:
-    target = array_ops.squeeze(target, squeeze_dims=[1])
+    target = array_ops.squeeze(target, axis=[1])
  loss_vec = nn.sparse_softmax_cross_entropy_with_logits(
      labels=target, logits=logits)
  return loss_vec
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
@ -777,7 +777,7 @@ class _RegressionHead(_SingleHead):
    key = prediction_key.PredictionKey.SCORES
    with ops.name_scope(None, "predictions", (logits,)):
      if self.logits_dimension == 1:
-        logits = array_ops.squeeze(logits, squeeze_dims=(1,), name=key)
+        logits = array_ops.squeeze(logits, axis=(1,), name=key)
      return {key: self._link_fn(logits)}
  def _metrics(self, eval_loss, predictions, labels, weights):
@ -974,7 +974,7 @@ def _softmax_cross_entropy_loss(labels, logits, weights=None):
    is_squeezed_labels = False
    # TODO(ptucker): This will break for dynamic shapes.
    if len(labels.get_shape()) == 2:
-      labels = array_ops.squeeze(labels, squeeze_dims=(1,))
+      labels = array_ops.squeeze(labels, axis=(1,))
      is_squeezed_labels = True
    loss = nn.sparse_softmax_cross_entropy_with_logits(
--- a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py
+++ b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py
@ -40,7 +40,7 @@ def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None):
                      [tensor_in, labels]):
    predictions = nn.xw_plus_b(tensor_in, weights, biases)
    if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2:
-      predictions = array_ops_.squeeze(predictions, squeeze_dims=[1])
+      predictions = array_ops_.squeeze(predictions, axis=[1])
    return predictions, losses.mean_squared_error(labels, predictions)
--- a/tensorflow/contrib/lite/examples/label_image/label_image.cc
+++ b/tensorflow/contrib/lite/examples/label_image/label_image.cc
@ -17,6 +17,7 @@ limitations under the License.
 #include <cstdio>
 #include <cstdlib>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
 #include <memory>
 #include <sstream>
@ -70,6 +71,23 @@ TfLiteStatus ReadLabelsFile(const string& file_name,
  return kTfLiteOk;
 }
 void PrintProfilingInfo(const profiling::ProfileEvent* e, uint32_t op_index,
                        TfLiteRegistration registration) {
  // output something like
  // time (ms) , Node xxx, OpCode xxx, symblic name
  //      5.352, Node   5, OpCode   4, DEPTHWISE_CONV_2D
  LOG(INFO) << std::fixed << std::setw(10) << std::setprecision(3)
            << (e->end_timestamp_us - e->begin_timestamp_us) / 1000.0
            << ", Node " << std::setw(3) << std::setprecision(3) << op_index
            << ", OpCode " << std::setw(3) << std::setprecision(3)
            << registration.builtin_code << ", "
            << EnumNameBuiltinOperator(
                   (BuiltinOperator)registration.builtin_code)
            << "\n";
 }
 void RunInference(Settings* s) {
  if (!s->model_name.c_str()) {
    LOG(ERROR) << "no model file name\n";
@ -166,6 +184,11 @@ void RunInference(Settings* s) {
      exit(-1);
  }
  profiling::Profiler* profiler = new profiling::Profiler();
  interpreter->SetProfiler(profiler);
  if (s->profiling) profiler->StartProfiling();
  struct timeval start_time, stop_time;
  gettimeofday(&start_time, NULL);
  for (int i = 0; i < s->loop_count; i++) {
@ -179,6 +202,18 @@ void RunInference(Settings* s) {
            << (get_us(stop_time) - get_us(start_time)) / (s->loop_count * 1000)
            << " ms \n";
  if (s->profiling) {
    profiler->StopProfiling();
    auto profile_events = profiler->GetProfileEvents();
    for (int i = 0; i < profile_events.size(); i++) {
      auto op_index = profile_events[i]->event_metadata;
      const auto node_and_registration =
          interpreter->node_and_registration(op_index);
      const TfLiteRegistration registration = node_and_registration->second;
      PrintProfilingInfo(profile_events[i], op_index, registration);
    }
  }
  const int output_size = 1000;
  const size_t num_results = 5;
  const float threshold = 0.001f;
@ -217,13 +252,14 @@ void RunInference(Settings* s) {
 void display_usage() {
  LOG(INFO) << "label_image\n"
-            << "--accelerated, -a: [0|1], use Android NNAPI or note\n"
+            << "--accelerated, -a: [0|1], use Android NNAPI or not\n"
            << "--count, -c: loop interpreter->Invoke() for certain times\n"
            << "--input_mean, -b: input mean\n"
            << "--input_std, -s: input standard deviation\n"
            << "--image, -i: image_name.bmp\n"
            << "--labels, -l: labels for the model\n"
            << "--tflite_model, -m: model_name.tflite\n"
            << "--profiling, -p: [0|1], profiling or not\n"
            << "--threads, -t: number of threads\n"
            << "--verbose, -v: [0|1] print more information\n"
            << "\n";
@ -241,6 +277,7 @@ int Main(int argc, char** argv) {
        {"image", required_argument, 0, 'i'},
        {"labels", required_argument, 0, 'l'},
        {"tflite_model", required_argument, 0, 'm'},
        {"profiling", required_argument, 0, 'p'},
        {"threads", required_argument, 0, 't'},
        {"input_mean", required_argument, 0, 'b'},
        {"input_std", required_argument, 0, 's'},
@ -249,7 +286,7 @@ int Main(int argc, char** argv) {
    /* getopt_long stores the option index here. */
    int option_index = 0;
-    c = getopt_long(argc, argv, "a:b:c:f:i:l:m:s:t:v:", long_options,
+    c = getopt_long(argc, argv, "a:b:c:f:i:l:m:p:s:t:v:", long_options,
                    &option_index);
    /* Detect the end of the options. */
@ -276,6 +313,10 @@ int Main(int argc, char** argv) {
      case 'm':
        s.model_name = optarg;
        break;
      case 'p':
        s.profiling = strtol(  // NOLINT(runtime/deprecated_fn)
            optarg, (char**)NULL, 10);
        break;
      case 's':
        s.input_std = strtod(optarg, NULL);
        break;
--- a/tensorflow/contrib/lite/examples/label_image/label_image.h
+++ b/tensorflow/contrib/lite/examples/label_image/label_image.h
@ -25,6 +25,7 @@ struct Settings {
  bool verbose = false;
  bool accel = false;
  bool input_floating = false;
  bool profiling = false;
  int loop_count = 1;
  float input_mean = 127.5f;
  float input_std = 127.5f;
--- a/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml
+++ b/tensorflow/contrib/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml
@ -84,4 +84,32 @@
            android:visibility="visible" />
    </RelativeLayout>
    <RelativeLayout
        android:id="@+id/control2"
        android:layout_width="match_parent"
        android:layout_height="135dp"
        android:layout_alignParentLeft="true"
        android:layout_alignParentStart="true"
        android:layout_alignTop="@+id/control"
        android:layout_marginLeft="300dp"
        android:layout_marginStart="300dp"
        android:background="@color/control_background">
        <ToggleButton
            android:id="@+id/button"
            android:textOff="@string/tflite"
            android:textOn="@string/nnapi"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:layout_alignParentLeft="true"
            android:layout_alignParentStart="true" />
        <NumberPicker
            android:id="@+id/np"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:layout_below="@+id/button"
            android:visibility="visible" />
    </RelativeLayout>
 </RelativeLayout>
--- a/tensorflow/contrib/lite/kernels/topk_v2.cc
+++ b/tensorflow/contrib/lite/kernels/topk_v2.cc
@ -25,8 +25,8 @@ namespace builtin {
 namespace topk_v2 {
 constexpr int kInputTensor = 0;
 constexpr int kInputTopK = 1;
-constexpr int kOutputIndexes = 0;
+constexpr int kOutputValues = 0;
-constexpr int kOutputValues = 1;
+constexpr int kOutputIndexes = 1;
 namespace {
 TfLiteStatus ResizeOutput(TfLiteContext* context, TfLiteNode* node) {
--- a/tensorflow/contrib/lite/kernels/topk_v2_test.cc
+++ b/tensorflow/contrib/lite/kernels/topk_v2_test.cc
@ -31,8 +31,8 @@ class TopKV2OpModel : public SingleOpModel {
                int top_k) {
    input_ = AddInput(input_type);
    top_k_ = AddInput(TensorType_INT32);
    output_indexes_ = AddOutput(TensorType_INT32);
    output_values_ = AddOutput(input_type);
    output_indexes_ = AddOutput(TensorType_INT32);
    SetBuiltinOp(BuiltinOperator_TOPK_V2, BuiltinOptions_TopKV2Options, 0);
    BuildInterpreter({input_shape, {1}});
    PopulateTensor<int32_t>(top_k_, {top_k});
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@ -609,7 +609,7 @@ enum {
   * Long short-term memory unit (LSTM) recurrent network layer.
   *
   * The default non-peephole implementation is based on:
-   * http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
+   * http://www.bioinf.jku.at/publications/older/2604.pdf
   * S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural
   * Computation, 9(8):1735-1780, 1997.
   *
--- a/tensorflow/contrib/lite/profiling/profile_buffer.h
+++ b/tensorflow/contrib/lite/profiling/profile_buffer.h
@ -37,9 +37,9 @@ struct ProfileEvent {
  // Label of the event. This usually describes the event.
  const char* tag;
  // Timestamp in microseconds when the event began.
-  int64_t begin_timestamp_us;
+  uint64_t begin_timestamp_us;
  // Timestamp in microseconds when the event ended.
-  int64_t end_timestamp_us;
+  uint64_t end_timestamp_us;
  // The field containing the type of event. This must be one of the event types
  // in EventType.
  EventType event_type;
@ -74,7 +74,7 @@ class ProfileBuffer {
    if (!enabled_) {
      return kInvalidEventHandle;
    }
-    int64_t timestamp = NowMicros();
+    uint64_t timestamp = NowMicros();
    int index = current_index_ % event_buffer_.size();
    event_buffer_[index].tag = tag;
    event_buffer_[index].event_type = event_type;
@ -134,7 +134,7 @@ class ProfileBuffer {
  }
 private:
-  static int64_t NowMicros() {
+  static uint64_t NowMicros() {
    // TODO(shashishekhar): Refactor this to a separate file.
    struct timeval tv;
    gettimeofday(&tv, nullptr);
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc
@ -124,6 +124,15 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) {
      SetDataTypeForAllOutputs(model, op, rand_op->dtype);
      break;
    }
    case OperatorType::kTopK_V2: {
      // topk(values: T, k: int32) -> values: T, indices: int32
      CHECK_EQ(op->inputs.size(), 2);
      CHECK_EQ(op->outputs.size(), 2);
      CHECK(model->GetArray(op->inputs[1]).data_type == ArrayDataType::kInt32);
      model->GetArray(op->outputs[0]).data_type = model->GetArray(op->inputs[0]).data_type;
      model->GetArray(op->outputs[1]).data_type = ArrayDataType ::kInt32;
      break;
    }
    case OperatorType::kTensorFlowUnsupported: {
      auto* unsupported_op = static_cast<TensorFlowUnsupportedOperator*>(op);
      // Some output tensors from the op could be eliminated by optimization.
--- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
+++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc
@ -1087,8 +1087,8 @@ void ProcessGatherOperator(Model* model, GatherOperator* op) {
 void ProcessTopkV2Operator(Model* model, TopKV2Operator* op) {
  const auto& input_values = model->GetArray(op->inputs[0]);
  const auto& input_k = model->GetArray(op->inputs[1]);
-  auto& output_indexes = model->GetArray(op->outputs[0]);
+  auto& output_values = model->GetArray(op->outputs[0]);
-  auto& output_values = model->GetArray(op->outputs[1]);
+  auto& output_indexes = model->GetArray(op->outputs[1]);
  // Bail if we already know the output shape.
  if (output_indexes.has_shape()) {
--- a/tensorflow/contrib/lite/toco/import_tensorflow.cc
+++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc
@ -1991,7 +1991,7 @@ void ConvertTopKV2Operator(const NodeDef& node,
    op->inputs.push_back(node.input(1));
  }
  // The op has two outputs.
-  op->outputs.push_back(node.name() + ":0");
+  op->outputs.push_back(node.name());
  op->outputs.push_back(node.name() + ":1");
  model->operators.emplace_back(op.release());
 }
--- a/tensorflow/contrib/lite/toco/tooling_util.cc
+++ b/tensorflow/contrib/lite/toco/tooling_util.cc
@ -825,11 +825,6 @@ void FixNoOrphanedArray(Model* model) {
 void CheckEachArray(const Model& model) {
  for (const auto& array_entry : model.GetArrayMap()) {
    const auto& array = array_entry.second;
    if (array->has_shape()) {
      for (int d : array->shape().dims()) {
        CHECK_GE(d, 1);
      }
    }
    // It's OK to have a buffer or an alloc, but not both.
    // (Since allocs are for transient arrays without a buffer).
    CHECK(!array->buffer || !array->alloc);
@ -839,6 +834,10 @@ void CheckEachArray(const Model& model) {
      // The presence of a fixed buffer should imply the presence of a fixed
      // shape.
      CHECK(array->has_shape());
      // Constant buffer should has a valid shape.
      for (int d : array->shape().dims()) {
        CHECK_GE(d, 1);
      }
      // The shape flat-size should agree with the buffer length.
      CHECK_EQ(array->buffer->Length(),
               RequiredBufferSizeForShape(array->shape()));
--- a/tensorflow/contrib/mpi/mpi_utils.h
+++ b/tensorflow/contrib/mpi/mpi_utils.h
@ -22,6 +22,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 // Skip MPI C++ bindings support, this matches the usage in other places
--- a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer.py
@ -56,21 +56,21 @@ class LazyAdamOptimizer(adam.AdamOptimizer):
    epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
    lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
-    # m := beta1 * m + (1 - beta1) * g_t
+    # \\(m := beta1 * m + (1 - beta1) * g_t\\)
    m = self.get_slot(var, "m")
    m_t = state_ops.scatter_update(m, grad.indices,
                                   beta1_t * array_ops.gather(m, grad.indices) +
                                   (1 - beta1_t) * grad.values,
                                   use_locking=self._use_locking)
-    # v := beta2 * v + (1 - beta2) * (g_t * g_t)
+    # \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\)
    v = self.get_slot(var, "v")
    v_t = state_ops.scatter_update(v, grad.indices,
                                   beta2_t * array_ops.gather(v, grad.indices) +
                                   (1 - beta2_t) * math_ops.square(grad.values),
                                   use_locking=self._use_locking)
-    # variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))
+    # \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\)
    m_t_slice = array_ops.gather(m_t, grad.indices)
    v_t_slice = array_ops.gather(v_t, grad.indices)
    denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t
--- a/tensorflow/contrib/optimizer_v2/adam.py
+++ b/tensorflow/contrib/optimizer_v2/adam.py
@ -40,23 +40,19 @@ class AdamOptimizer(optimizer_v2.OptimizerV2):
    Initialization:
-    ```
+    $$m_0 := 0 (Initialize initial 1st moment vector)$$
-    m_0 <- 0 (Initialize initial 1st moment vector)
+    $$v_0 := 0 (Initialize initial 2nd moment vector)$$
-    v_0 <- 0 (Initialize initial 2nd moment vector)
+    $$t := 0 (Initialize timestep)$$
    t <- 0 (Initialize timestep)
    ```
    The update rule for `variable` with gradient `g` uses an optimization
    described at the end of section2 of the paper:
-    ```
+    $$t := t + 1$$
-    t <- t + 1
+    $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
    lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
-    m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+    $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
-    v_t <- beta2 * v_{t-1} + (1 - beta2) * g * g
+    $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
-    variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
+    $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
    ```
    The default value of 1e-8 for epsilon might not be a good default in
    general. For example, when training an Inception network on ImageNet a
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
@ -307,6 +307,21 @@ class LSTMTest(test.TestCase):
    self._seed = 23489
    np.random.seed(self._seed)
  def testDType(self):
    # Test case for GitHub issue 16228
    # Not passing dtype in constructor results in default float32
    lstm = rnn_cell.LSTMCell(10)
    input_tensor = array_ops.ones([10, 50])
    lstm.build(input_tensor.get_shape())
    self.assertEqual(lstm._bias.dtype, dtypes.float32_ref)
    # Explicitly pass dtype in constructor
    for dtype in [dtypes.float16, dtypes.float32, dtypes.float64]:
      lstm = rnn_cell.LSTMCell(10, dtype=dtype)
      input_tensor = array_ops.ones([10, 50])
      lstm.build(input_tensor.get_shape())
      self.assertEqual(lstm._bias.dtype, dtype._as_ref)
  def testNoProjNoSharding(self):
    num_units = 3
    input_size = 5
--- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py
+++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py
@ -37,7 +37,7 @@ def _top_k_generator(k):
  def _top_k(probabilities, targets):
    targets = math_ops.to_int32(targets)
    if targets.get_shape().ndims > 1:
-      targets = array_ops.squeeze(targets, squeeze_dims=[1])
+      targets = array_ops.squeeze(targets, axis=[1])
    return metric_ops.streaming_mean(nn.in_top_k(probabilities, targets, k))
  return _top_k
@ -57,7 +57,7 @@ def _r2(probabilities, targets, weights=None):
 def _squeeze_and_onehot(targets, depth):
-  targets = array_ops.squeeze(targets, squeeze_dims=[1])
+  targets = array_ops.squeeze(targets, axis=[1])
  return array_ops.one_hot(math_ops.to_int32(targets), depth)
--- a/tensorflow/contrib/tensor_forest/hybrid/python/layers/fully_connected.py
+++ b/tensorflow/contrib/tensor_forest/hybrid/python/layers/fully_connected.py
@ -55,7 +55,7 @@ class ManyToOneLayer(hybrid_layer.HybridLayer):
      # There is always one activation per instance by definition, so squeeze
      # away the extra dimension.
-      return array_ops.squeeze(nn_activations, squeeze_dims=[1])
+      return array_ops.squeeze(nn_activations, axis=[1])
 class FlattenedFullyConnectedLayer(hybrid_layer.HybridLayer):
--- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py
+++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py
@ -445,7 +445,7 @@ class RandomForestGraphs(object):
          mask = math_ops.less(
              r, array_ops.ones_like(r) * self.params.bagging_fraction)
          gather_indices = array_ops.squeeze(
-              array_ops.where(mask), squeeze_dims=[1])
+              array_ops.where(mask), axis=[1])
          # TODO(thomaswc): Calculate out-of-bag data and labels, and store
          # them for use in calculating statistics later.
          tree_data = array_ops.gather(processed_dense_features, gather_indices)
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@ -111,20 +111,22 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph,
  }
 }
-std::pair<string, int> ParseTensorName(string name, int default_idx = 0) {
+std::pair<string, int> ParseTensorName(const string& name,
                                       int default_idx = 0) {
  string name_no_idx = name;
  int idx = default_idx;
-  size_t sep = name.find_last_of(':');
+  const size_t sep = name_no_idx.find_last_of(':');
  if (sep != string::npos) {
-    name = name.substr(0, sep);
+    name_no_idx = name_no_idx.substr(0, sep);
    idx = std::stoi(name.substr(sep + 1));
  }
-  return std::make_pair(name, idx);
+  return std::make_pair(name_no_idx, idx);
 }
 std::unordered_map<string, std::vector<int>> BuildTensorNameMap(
    const std::vector<string>& tensor_names) {
  std::unordered_map<string, std::vector<int>> result;
-  for (string const& tensor_name : tensor_names) {
+  for (const string& tensor_name : tensor_names) {
    string node_name;
    int index;
    std::tie(node_name, index) = ParseTensorName(tensor_name);
@ -132,6 +134,7 @@ std::unordered_map<string, std::vector<int>> BuildTensorNameMap(
  }
  return result;
 }
 // TODO(sami): convert references to pointers
 struct ConvertGraphParams {
  ConvertGraphParams(
--- a/tensorflow/contrib/timeseries/python/timeseries/state_management_test.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/state_management_test.py
@ -78,7 +78,7 @@ class StubTimeSeriesModel(model.TimeSeriesModel):
    batch_end_values = array_ops.squeeze(
        array_ops.slice(values, [0, array_ops.shape(times)[1] - 1, 0],
                        [-1, 1, -1]),
-        squeeze_dims=[1, 2])
+        axis=[1, 2])
    # A pretty odd but easy to think about loss: L1 loss on the batch end
    # values.
    loss = math_ops.reduce_sum(
--- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/kalman_filter.py
+++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/kalman_filter.py
@ -170,7 +170,7 @@ class KalmanFilter(object):
        math_ops.matmul(
            transition_matrices,
            prior_state[..., None]),
-        squeeze_dims=[-1])
+        axis=[-1])
    return advanced_state
  def predict_state_var(
@ -254,7 +254,7 @@ class KalmanFilter(object):
            kalman_gain_transposed,
            array_ops.expand_dims(residual, -1),
            adjoint_a=True),
-        squeeze_dims=[-1])
+        axis=[-1])
    gain_obs = math_ops.matmul(
        kalman_gain_transposed, observation_model, adjoint_a=True)
    identity_extradim = linalg_ops.eye(
@ -332,7 +332,7 @@ class KalmanFilter(object):
            array_ops.expand_dims(state_mean, 1),
            observation_model,
            adjoint_b=True),
-        squeeze_dims=[1])
+        axis=[1])
    observed_var = math_ops.matmul(
        math_ops.matmul(observation_model, state_var),
        observation_model,
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -2292,7 +2292,9 @@ tf_cuda_library(
 CORE_CPU_BASE_HDRS = GRAPH_HDRS + [
    "common_runtime/device.h",
    "common_runtime/device_factory.h",
    "common_runtime/device_mgr.h",
    "common_runtime/device_set.h",
    "common_runtime/eval_const_tensor.h",
    "common_runtime/graph_runner.h",
    "common_runtime/shape_refiner.h",
@ -2350,9 +2352,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [
    "common_runtime/copy_tensor.h",
    "common_runtime/costmodel_manager.h",
    "common_runtime/debugger_state_interface.h",
    "common_runtime/device_factory.h",
    "common_runtime/device_resolver_local.h",
    "common_runtime/device_set.h",
    "common_runtime/dma_helper.h",
    "common_runtime/eigen_thread_pool.h",
    "common_runtime/executor.h",
--- a/tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt
@ -82,9 +82,9 @@ END
  }
  summary: "Update \'*var\' according to the Adam algorithm."
  description: <<END
-lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
+$$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
-m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
+$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
-v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
+$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
-variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
+$$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
 END
 }
--- a/tensorflow/core/api_def/base_api/api_def_Pad.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_Pad.pbtxt
@ -24,5 +24,6 @@ pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
                      [0, 0, 2, 2, 0, 0]
                      [0, 0, 0, 0, 0, 0]]
 ```
 END
 }
--- a/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_QuantizeV2.pbtxt
@ -44,6 +44,7 @@ In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
 out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
 if T == qint8, out[i] -= (range(T) + 1) / 2.0
 ```
 here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
 *MIN_COMBINED Mode Example*
@ -87,6 +88,7 @@ choosing to elide the lowest possible value for symmetry (e.g., output range is
 We first find the range of values in our tensor. The
 range we use is always centered on 0, so we find m such that
 ```c++
  m = max(abs(input_min), abs(input_max))
 ```
@ -95,6 +97,7 @@ Our input tensor range is then `[-m, m]`.
 Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
 If T is signed, this is
 ```
  num_bits = sizeof(T) * 8
  [min_fixed, max_fixed] =
@ -102,16 +105,19 @@ If T is signed, this is
 ```
 Otherwise, if T is unsigned, the fixed-point range is
 ```
  [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
 ```
 From this we compute our scaling factor, s:
 ```c++
  s = (max_fixed - min_fixed) / (2 * m)
 ```
 Now we can quantize the elements of our tensor:
 ```c++
 result = round(input * s)
 ```
--- a/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt
@ -76,9 +76,9 @@ END
  }
  summary: "Update \'*var\' according to the Adam algorithm."
  description: <<END
-lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
+$$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$
-m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
+$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
-v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
+$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
-variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
+$$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
 END
 }
--- a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt
@ -25,12 +25,12 @@ A new tensor with the given shape and updates applied according
 to the indices.
 END
  }
-  summary: "Scatter `updates` into a new (initially zero) tensor according to `indices`."
+  summary: "Scatter `updates` into a new tensor according to `indices`."
  description: <<END
-Creates a new tensor by applying sparse `updates` to individual
+Creates a new tensor by applying sparse `updates` to individual values or
-values or slices within a zero tensor of the given `shape` according to
+slices within a tensor (initially zero for numeric, empty for string) of
-indices.  This operator is the inverse of the @{tf.gather_nd} operator which
+the given `shape` according to indices.  This operator is the inverse of the
-extracts values or slices from a given tensor.
+@{tf.gather_nd} operator which extracts values or slices from a given tensor.
 **WARNING**: The order in which updates are applied is nondeterministic, so the
 output will be nondeterministic if `indices` contains duplicates.
--- a/tensorflow/core/common_runtime/graph_execution_state.cc
+++ b/tensorflow/core/common_runtime/graph_execution_state.cc
@ -490,7 +490,7 @@ Status GraphExecutionState::OptimizeGraph(
        cpu_device = device;
      }
    }
-    grappler::VirtualCluster cluster(device_map);
+    grappler::VirtualCluster cluster(device_map, device_set_);
    GraphDef new_graph;
    TF_RETURN_IF_ERROR(grappler::RunMetaOptimizer(
        item, rewrite_options, cpu_device, &cluster, &new_graph));
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@ -547,14 +547,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
    // If Op has been specifically assigned to a non-CPU device, then No.
    if (!n->assigned_device_name().empty() &&
-        !StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) {
+        !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) {
      result = false;
      reason = "Op has been assigned a runtime device that is not CPU.";
    }
    // If user has specifically assigned this op to a non-CPU device, then No.
    if (!n->def().device().empty() &&
-        !StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) {
+        !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) {
      result = false;
      reason = "User has assigned a device that is not CPU.";
    }
@ -2691,14 +2691,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
    // If Op has been specifically assigned to a non-CPU device, then No.
    if (!n->assigned_device_name().empty() &&
-        !StringPiece(n->assigned_device_name()).contains(kCPUDeviceSubStr)) {
+        !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) {
      result = false;
      reason = "Op has been assigned a runtime device that is not CPU.";
    }
    // If user has specifically assigned this op to a non-CPU device, then No.
    if (!n->def().device().empty() &&
-        !StringPiece(n->def().device()).contains(kCPUDeviceSubStr)) {
+        !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) {
      result = false;
      reason = "User has assigned a device that is not CPU.";
    }
--- a/tensorflow/core/grappler/clusters/BUILD
+++ b/tensorflow/core/grappler/clusters/BUILD
@ -56,6 +56,7 @@ cc_library(
    ],
    visibility = ["//visibility:public"],
    deps = [
        "//tensorflow/core:core_cpu_base",
        "//tensorflow/core:framework",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
@ -73,6 +74,7 @@ cc_library(
    visibility = ["//visibility:public"],
    deps = [
        ":cluster",
        "//tensorflow/core:core_cpu_base",
        "//tensorflow/core:framework",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core/grappler/costs:op_level_cost_estimator",
--- a/tensorflow/core/grappler/clusters/cluster.h
+++ b/tensorflow/core/grappler/clusters/cluster.h
@ -21,6 +21,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/lib/core/status.h"
@ -92,6 +93,10 @@ class Cluster {
  // sorted alphabetically.
  const std::vector<string> GetDeviceNames() const;
  // The DeviceSet is not always available, but when it is it contains a
  // superset of the devices listed in GetDevices/GetDeviceNames().
  const DeviceSet* GetDeviceSet() const { return device_set_; }
  // Enables collecting the allocator stats. Call with enable=true must be made
  // before Provision().
  virtual Status EnablePeakMemoryStats(bool enable) {
@ -119,6 +124,7 @@ class Cluster {
 protected:
  std::unordered_map<string, DeviceProperties> devices_;
  const DeviceSet* device_set_ = nullptr;  // Not owned
  const int timeout_s_;
  SessionOptions options_;
  RunOptions run_options_;
--- a/tensorflow/core/grappler/clusters/virtual_cluster.cc
+++ b/tensorflow/core/grappler/clusters/virtual_cluster.cc
@ -37,6 +37,14 @@ VirtualCluster::VirtualCluster(
    : Cluster(0), node_estimator_(node_estimator), node_manager_(node_manager) {
  devices_ = devices;
 }
 VirtualCluster::VirtualCluster(
    const std::unordered_map<string, DeviceProperties>& devices,
    const DeviceSet* device_set)
    : VirtualCluster(devices) {
  device_set_ = device_set;
 }
 VirtualCluster::~VirtualCluster() {}
 Status VirtualCluster::Provision() { return Status::OK(); }
--- a/tensorflow/core/grappler/clusters/virtual_cluster.h
+++ b/tensorflow/core/grappler/clusters/virtual_cluster.h
@ -17,6 +17,8 @@ limitations under the License.
 #define TENSORFLOW_CORE_GRAPPLER_CLUSTERS_VIRTUAL_CLUSTER_H_
 #include <unordered_map>
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/grappler/clusters/cluster.h"
 #include "tensorflow/core/grappler/costs/op_level_cost_estimator.h"
 #include "tensorflow/core/grappler/costs/virtual_scheduler.h"
@ -34,6 +36,8 @@ class VirtualCluster : public Cluster {
  VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices,
                 OpLevelCostEstimator* node_estimator,
                 ReadyNodeManager* node_manager);
  VirtualCluster(const std::unordered_map<string, DeviceProperties>& devices,
                 const DeviceSet* device_set);
  ~VirtualCluster() override;
--- a/tensorflow/core/grappler/costs/virtual_scheduler.h
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.h
@ -199,7 +199,7 @@ class FirstReadyManager : public ReadyNodeManager {
  // current node.
  std::vector<const NodeDef*> nodes_;
  // Newly added nodes are added to waiting_queue_. That way, GetCurrNode(),
-  // wihch returns the front of the nodes_, always returns the same node,
+  // which returns the front of the nodes_, always returns the same node,
  // even if any of new nodes has time_ready smaller than the current node's.
  std::vector<const NodeDef*> waiting_queue_;
  // Comparator functor for heap; stl heap is max heap, so we use "greater than"
@ -212,7 +212,7 @@ class FirstReadyManager : public ReadyNodeManager {
 };
 // CompositeNodeManager has a few other NodeManagers: per-device LIFO for normal
-// ops (neither _Send nor _Recv) and FirstyReadyManagers for _Send ops and _Recv
+// ops (neither _Send nor _Recv) and FirstReadyManagers for _Send ops and _Recv
 // ops, and then it chooses FirstReady among the ops chosen from each
 // internal NodeManagers. The objective is to maximize producer-consumer
 // locality within device, while processing nodes across devices, including
--- a/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h
+++ b/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h
@ -33,7 +33,7 @@ class CustomGraphOptimizerRegistry {
  static std::vector<string> GetRegisteredOptimizers();
  typedef std::function<CustomGraphOptimizer*()> Creator;
-  // Regsiter graph optimizer which can be called during program initialization.
+  // Register graph optimizer which can be called during program initialization.
  // This class is not thread-safe.
  static void RegisterOptimizerOrDie(const Creator& optimizer_creator,
                                     const string& name);
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@ -160,13 +160,26 @@ Status MetaOptimizer::InitializeOptimizersByName(
      VLOG(2) << "Can't register an optimizer by name: " << optimizer_name;
    }
  }
  for (const auto& optimizer_config : cfg_.custom_optimizers()) {
    auto custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull(
        optimizer_config.name());
    if (custom_optimizer) {
      VLOG(2) << "Registered custom configurable graph optimizer: "
              << optimizer_config.name();
      TF_RETURN_IF_ERROR(custom_optimizer->Init(&optimizer_config));
      optimizers->push_back(std::move(custom_optimizer));
    } else {
      VLOG(2) << "Can't register an optimizer by name: "
              << optimizer_config.name();
    }
  }
  return Status::OK();
 }
 Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item,
                                    GraphDef* optimized_graph) {
  std::vector<std::unique_ptr<GraphOptimizer>> optimizers;
-  if (cfg_.optimizers().empty()) {
+  if (cfg_.optimizers().empty() && cfg_.custom_optimizers().empty()) {
    TF_RETURN_IF_ERROR(InitializeOptimizers(&optimizers));
  } else {
    TF_RETURN_IF_ERROR(InitializeOptimizersByName(&optimizers));
@ -337,7 +350,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
         cfg.auto_parallel().enable() ||
         cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT ||
         cfg.debug_stripper() == RewriterConfig::ON ||
-         !cfg.optimizers().empty();
+         !cfg.optimizers().empty() || !cfg.custom_optimizers().empty();
 }
 Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg,
--- a/tensorflow/core/kernels/batch_util.cc
+++ b/tensorflow/core/kernels/batch_util.cc
@ -134,6 +134,8 @@ Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index) {
  switch (element.dtype()) {
    TF_CALL_ALL_TYPES(HANDLE_TYPE);
    TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE);
    TF_CALL_uint32(HANDLE_TYPE);
    TF_CALL_uint64(HANDLE_TYPE);
 #undef HANDLE_TYPE
    default:
      return errors::Unimplemented("CopyElementToSlice Unhandled data type: ",
--- a/tensorflow/core/kernels/cwise_op_floor_div.cc
+++ b/tensorflow/core/kernels/cwise_op_floor_div.cc
@ -16,8 +16,8 @@ limitations under the License.
 #include "tensorflow/core/kernels/cwise_ops_common.h"
 namespace tensorflow {
-REGISTER5(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
+REGISTER6(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16,
-          int16, int32, int64);
+          int8, int16, int32, int64);
 REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float,
          Eigen::half, double);
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@ -20,6 +20,7 @@ limitations under the License.
 #include <map>
 #include <string>
 #include <vector>
 #include <memory>
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@ -42,14 +43,13 @@ limitations under the License.
 #include "tensorflow/core/util/mkl_util.h"
 #ifndef INTEL_MKL_ML
 #include "mkldnn.hpp"
 using mkldnn::prop_kind;
 using mkldnn::stream;
 using mkldnn::convolution_direct;
 using mkldnn::convolution_forward;
 using mkldnn::convolution_direct;
 #else
 #include "mkl_dnn.h"
 #include "mkl_dnn_types.h"
@ -57,11 +57,232 @@ using mkldnn::convolution_forward;
 namespace tensorflow {
 #ifndef INTEL_MKL_ML
 struct ConvFwdDimensions {
  memory::dims src_dims;
  memory::dims filter_dims;
  memory::dims bias_dims;
  memory::dims dst_dims;
  memory::dims strides;
  memory::dims dilations;
  memory::dims padding_left;
  memory::dims padding_right;
  ConvFwdDimensions(memory::dims src_dims,
    memory::dims filter_dims, memory::dims bias_dims,
    memory::dims dst_dims, memory::dims strides,
    memory::dims dilations, memory::dims padding_left,
    memory::dims padding_right) :
      src_dims(src_dims), filter_dims(filter_dims),
      bias_dims(bias_dims), dst_dims(dst_dims),
      strides(strides), dilations(dilations),
      padding_left(padding_left), padding_right(padding_right) {
  }
 };
 template <typename T>
 class Conv2DFwd : public DnnOp {
 public:
  explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) {
    fwd_stream_.reset(new stream(stream::kind::eager));
    // create conv primitive
    if (conv_fwd_ == nullptr) {
      Setup(convFwdDims);
    }
  }
  ~Conv2DFwd() {}
  // Convolution forward execute with bias
  //   src_data:    input data buffer of src
  //   filter_data: input data buffer of filter (weights)
  //   bias_data:   input data buffer of bias
  //   dst_data:    output data buffer of dst
  void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) {
    src_mem_->set_data_handle(static_cast<void*>(src_data));
    filter_mem_->set_data_handle(static_cast<void*>(filter_data));
    bias_mem_->set_data_handle(static_cast<void*>(bias_data));
    dst_mem_->set_data_handle(static_cast<void*>(dst_data));
    fwd_stream_->submit(fwd_primitives_);
    // after exec, set data handle back
    src_mem_->set_data_handle(DummyData);
    filter_mem_->set_data_handle(DummyData);
    bias_mem_->set_data_handle(DummyData);
    dst_mem_->set_data_handle(DummyData);
    return;
  }
  // Convolution forward execute without bias
  //   src_data:    input data buffer of src
  //   filter_data: input data buffer of filter (weights)
  //   dst_data:    output data buffer of dst
  void Execute(T* src_data, T* filter_data, T* dst_data) {
    src_mem_->set_data_handle(static_cast<void*>(src_data));
    filter_mem_->set_data_handle(static_cast<void*>(filter_data));
    dst_mem_->set_data_handle(static_cast<void*>(dst_data));
    fwd_stream_->submit(fwd_primitives_);
    // after exec, set data handle back
    src_mem_->set_data_handle(DummyData);
    filter_mem_->set_data_handle(DummyData);
    dst_mem_->set_data_handle(DummyData);
    return;
  }
  // expected memory format for this primitive instance
  memory::format src_fmt_;
  memory::format filter_fmt_;
  // convolution primitive
  std::shared_ptr<mkldnn::convolution_forward::primitive_desc> fwd_pd_;
  std::shared_ptr<mkldnn::primitive> conv_fwd_;
 private:
  void Setup(const ConvFwdDimensions& convFwdDims) {
    // create memory descriptors for convolution data w/ no specified format
    src_md_.reset(new memory::desc({convFwdDims.src_dims},
        MklDnnType<T>(), memory::format::any));
    filter_md_.reset(new memory::desc({convFwdDims.filter_dims},
        MklDnnType<T>(), memory::format::any));
    dst_md_.reset(new memory::desc({convFwdDims.dst_dims},
        MklDnnType<T>(), memory::format::any));
    if (!convFwdDims.bias_dims.empty())
        bias_md_.reset(new memory::desc({convFwdDims.bias_dims},
            MklDnnType<T>(), memory::format::any));
    // create a convolution
    if (!convFwdDims.bias_dims.empty()) {
      fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward,
          convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_,
          convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left,
          convFwdDims.padding_right, padding_kind::zero));
    } else {
      fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward,
          convolution_direct, *src_md_, *filter_md_, *dst_md_,
          convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left,
          convFwdDims.padding_right, padding_kind::zero));
    }
    fwd_pd_.reset(new convolution_forward::primitive_desc(
        *fwd_desc_, cpu_engine_));
    // store the expected memory format
    src_fmt_ = static_cast<mkldnn::memory::format>(
        fwd_pd_.get()->src_primitive_desc().desc().data.format);
    filter_fmt_ = static_cast<mkldnn::memory::format>(
        fwd_pd_.get()->weights_primitive_desc().desc().data.format);
    // create memory primitive based on dummy data
    src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData));
    filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(),
                      DummyData));
    dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData));
    // create convolution primitive and add it to net
    if (!convFwdDims.bias_dims.empty()) {
        bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType<T>(),
                        memory::format::x}, cpu_engine_}, DummyData));
        conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_,
                        *filter_mem_, *bias_mem_, *dst_mem_));
    } else {
        conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_,
                        *filter_mem_, *dst_mem_));
    }
    fwd_primitives_.push_back(*conv_fwd_);
    return;
  }
  // MKLDNN memory
  std::shared_ptr<mkldnn::memory> src_mem_;
  std::shared_ptr<mkldnn::memory> filter_mem_;
  std::shared_ptr<mkldnn::memory> bias_mem_;
  std::shared_ptr<mkldnn::memory> dst_mem_;
  std::shared_ptr<mkldnn::stream> fwd_stream_;
  std::vector<mkldnn::primitive> fwd_primitives_;
  // desc & prmitive desc
  std::shared_ptr<mkldnn::convolution_forward::desc> fwd_desc_;
  // memory desc
  std::shared_ptr<mkldnn::memory::desc> src_md_;
  std::shared_ptr<mkldnn::memory::desc> filter_md_;
  std::shared_ptr<mkldnn::memory::desc> bias_md_;
  std::shared_ptr<mkldnn::memory::desc> dst_md_;
  engine cpu_engine_ = engine(engine::cpu, 0);
 };
 template <typename T>
 class Conv2DFwdFactory : public DnnOpFactory<T> {
 public:
  static Conv2DFwd<T>* Get(const ConvFwdDimensions& convFwdDims) {
     Conv2DFwd<T>* conv2d_fwd = nullptr;
     // try to find a suitable one in pool
     conv2d_fwd = dynamic_cast<Conv2DFwd<T>*> (
       Conv2DFwdFactory<T>::GetInstance().GetConv2DFwd(convFwdDims));
     if (conv2d_fwd == nullptr) {
       conv2d_fwd = new Conv2DFwd<T>(convFwdDims);
       Conv2DFwdFactory<T>::GetInstance().SetConv2DFwd(
           convFwdDims, conv2d_fwd);
     }
     return conv2d_fwd;
  }
 private:
  Conv2DFwdFactory() {}
  ~Conv2DFwdFactory() {}
  static const int kDilationH = 0, kDilationW = 1;
  static Conv2DFwdFactory& GetInstance() {
    static Conv2DFwdFactory instance_;
    return instance_;
  }
  static std::string CreateKey(const ConvFwdDimensions& convFwdDims) {
    std::string prefix = "conv2d_fwd_";
    FactoryKeyCreator key_creator;
    key_creator.AddAsKey(prefix);
    key_creator.AddAsKey(convFwdDims.src_dims);
    key_creator.AddAsKey(convFwdDims.filter_dims);
    key_creator.AddAsKey(convFwdDims.bias_dims);
    key_creator.AddAsKey(convFwdDims.dst_dims);
    key_creator.AddAsKey(convFwdDims.strides);
    key_creator.AddAsKey(convFwdDims.dilations);
    key_creator.AddAsKey(convFwdDims.padding_left);
    key_creator.AddAsKey(convFwdDims.padding_right);
    return key_creator.GetKey();
  }
  DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) {
    std::string key = CreateKey(convFwdDims);
    return this->GetOp(key);
  }
  void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) {
    std::string key = CreateKey(convFwdDims);
    this->SetOp(key, op);
  }
 };
 #endif
 typedef Eigen::ThreadPoolDevice CPUDevice;
-// MKL-DNN is now default. MKL-ML must be specified explicitly.
+// For now, MKL-ML is default. So making MKL-DNN not a default choice.
 #ifdef INTEL_MKL_ML
 template <typename Device, typename T, bool biasEnabled>
 class MklConv2DOp : public OpKernel {
 public:
@ -528,8 +749,6 @@ class MklConv2DOp : public OpKernel {
  void Compute(OpKernelContext* context) override {
    try {
      auto cpu_engine = engine(engine::cpu, 0);
      // Input tensors
      const Tensor& src_tensor = MklGetInput(context, kInputIndex_Src);
      const Tensor& filter_tensor = MklGetInput(context, kInputIndex_Filter);
@ -543,11 +762,11 @@ class MklConv2DOp : public OpKernel {
      MklDnnData<T> src(&cpu_engine);
      MklDnnData<T> filter(&cpu_engine);
-      MklDnnData<T> output(&cpu_engine);
+      MklDnnData<T> dst(&cpu_engine);  // output
-      memory::dims src_dims, filter_dims, padding_l, padding_r,
+      memory::dims src_dims, filter_dims, padding_left, padding_right,
                   dilations, strides;
-      memory::dims output_dims_tf_order, output_dims_mkl_order;
+      memory::dims dst_dims_tf_order, dst_dims_mkl_order;
      // Get shapes of input tensors in MKL-DNN order
      MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_,
@ -555,31 +774,29 @@ class MklConv2DOp : public OpKernel {
      auto src_tf_shape = GetTfShape(context, kInputIndex_Src);
      auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter);
      conv_utl.GetConvFwdSizesInMklOrder(
-          src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, &strides,
+          src_tf_shape, filter_tf_shape, &src_dims, &filter_dims,
-          &dilations, &output_dims_tf_order, &output_dims_mkl_order,
+          &strides, &dilations, &dst_dims_tf_order, &dst_dims_mkl_order,
-          &padding_l, &padding_r);
+          &padding_left, &padding_right);
      if (!context->status().ok()) return;
      // Check for corner case - if there is nothing to compute, return.
-      TensorShape output_tf_shape = MklDnnDimsToTFShape(output_dims_tf_order);
+      TensorShape dst_tf_shape = MklDnnDimsToTFShape(dst_dims_tf_order);
      // Corner cases: output with 0 elements and 0 batch size.
-      Tensor* output_tensor = nullptr;
+      Tensor* dst_tensor = nullptr;
-      if (output_tf_shape.num_elements() == 0 || output_dims_tf_order[0] == 0) {
+      if (dst_tf_shape.num_elements() == 0 ||
-        // TODO(jbobba): Verify correctness here
+          dst_dims_tf_order[0] == 0) {
-        //               Need semantics for Null MKL tensor
+        MklDnnShape dst_mkl_shape;
-        MklDnnShape output_mkl_shape;
+        dst_mkl_shape.SetMklTensor(false);
-        output_mkl_shape.SetMklTensor(false);
+        AllocateOutputSetMklShape(context, kOutputIndex_Dst,
-
+                    &dst_tensor, src_tf_shape, dst_mkl_shape);
        AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor,
                                  src_tf_shape, output_mkl_shape);
        // MklConv2D also outputs converted filter as 2nd output of Conv2D.
        filter_mkl_shape.SetMklTensor(false);
        Tensor* output_filter_tensor = nullptr;
        AllocateOutputSetMklShape(context, kOutputIndex_Filter,
-                                  &output_filter_tensor, filter_tf_shape,
+                                  &output_filter_tensor,
-                                  filter_mkl_shape);
+                                  filter_tf_shape, filter_mkl_shape);
        return;
      }
@ -587,6 +804,7 @@ class MklConv2DOp : public OpKernel {
      // Describe how the inputs and outputs of Convolution look like. Also
      // specify buffers containing actual input and output data.
      auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_);
      // If input is in MKL layout, then simply grab input layout; otherwise,
      // construct input Tf layout. For TF layout, although input shape
      // (src_dims) required is in MKL-DNN order, the layout is Tensorflow's
@ -595,6 +813,7 @@ class MklConv2DOp : public OpKernel {
                        ? src_mkl_shape.GetMklLayout()
                        : memory::desc(src_dims, MklDnnType<T>(), tf_fmt);
      src.SetUsrMem(src_md, &src_tensor);
      // Although filter shape (filter_dims) required is in MKL-DNN order,
      // the layout is Tensorflow's layout (HWIO).
      auto filter_md = filter_mkl_shape.IsMklTensor()  // Should NEVER be true
@ -603,97 +822,69 @@ class MklConv2DOp : public OpKernel {
                                          memory::format::hwio);
      filter.SetUsrMem(filter_md, &filter_tensor);
      // Set output shape (output_dims) required in MKL-DNN order.
      // Currently, we set output layout as Tensorflow's layout (NHWC or NCHW
      // depending on data format). But later we propagate Mkl layout of the
      // output to the next op directly.
      output.SetUsrMem(output_dims_mkl_order, tf_fmt);
      // Create memory descriptors for convolution data w/ no specified format.
      src.SetOpMemDesc(src_dims, memory::format::any);
      filter.SetOpMemDesc(filter_dims, memory::format::any);
      output.SetOpMemDesc(output_dims_mkl_order, memory::format::any);
      // MKLDNN dilation starts from 0.
      dilations[kDilationH] -= 1;
      dilations[kDilationW] -= 1;
      // get a conv2d fwd from primitive pool
      Conv2DFwd<T> *conv2d_fwd = nullptr;
      if (biasEnabled) {
-          // Create convolution primitive with Bias.
+        memory::dims bias_dims = {};
-          MklDnnData<T> bias(&cpu_engine);
+        conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims);
-          memory::dims bias_size;
+        ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims,
-          conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size);
+          dst_dims_mkl_order, strides, dilations, padding_left, padding_right);
-          const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias);
+        conv2d_fwd = Conv2DFwdFactory<T>::Get(convFwdDims);
          bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor);
          bias.SetOpMemDesc(bias_size, memory::format::any);
          // Create convolution primitive with Bias.
          // Use MKLDNN dilated convolution in case of dilated rate (>0).
          auto conv_desc = (dilations[kDilationH] > 0 ||
              dilations[kDilationW] > 0) ?
              convolution_forward::desc(prop_kind::forward,
                      convolution_direct, src.GetOpMemDesc(),
                      filter.GetOpMemDesc(), bias.GetOpMemDesc(),
                      output.GetOpMemDesc(), strides, dilations,
                      padding_l, padding_r,
                      TFPaddingToMklDnnPadding(padding_)):
              convolution_forward::desc(prop_kind::forward,
                      convolution_direct, src.GetOpMemDesc(),
                      filter.GetOpMemDesc(), bias.GetOpMemDesc(),
                      output.GetOpMemDesc(), strides,
                      padding_l, padding_r,
                      TFPaddingToMklDnnPadding(padding_));
          auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                  cpu_engine);
          AllocateOutputTensor(context, conv_prim_desc,
                               output_dims_mkl_order, tf_fmt, &output_tensor);
          // Set data handle for output.
          output.SetUsrMemDataHandle(output_tensor);
          Tensor* filter_out_tensor = nullptr;
          AllocateFilterOutputTensor(context, conv_prim_desc,
                TFShapeToMklDnnDims(filter_tf_shape),
                &filter_out_tensor);
          PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output,
                               filter_out_tensor);
      } else {
-          // Create convolution primitive without Bias.
+        ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS,
-          // Use MKLDNN dilated convolution in case of dilated rate (>0).
+          dst_dims_mkl_order, strides, dilations, padding_left, padding_right);
-          auto conv_desc = (dilations[kDilationH] > 0 ||
+        conv2d_fwd = Conv2DFwdFactory<T>::Get(convFwdDims);
-            dilations[kDilationW] > 0) ?
+      }
            convolution_forward::desc(prop_kind::forward,
              convolution_direct, src.GetOpMemDesc(),
              filter.GetOpMemDesc(), output.GetOpMemDesc(),
              strides, dilations, padding_l, padding_r,
              TFPaddingToMklDnnPadding(padding_)):
          convolution_forward::desc(prop_kind::forward,
              convolution_direct, src.GetOpMemDesc(),
              filter.GetOpMemDesc(), output.GetOpMemDesc(),
              strides, padding_l, padding_r,
              TFPaddingToMklDnnPadding(padding_));
          auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc,
                                                                  cpu_engine);
          AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order,
                               tf_fmt, &output_tensor);
          // Set data handle for output.
          output.SetUsrMemDataHandle(output_tensor);
      // allocate output tensors output_tensor and filter_out_tensor
      std::shared_ptr<mkldnn::convolution_forward::primitive_desc>
      conv_fwd_pd = conv2d_fwd->fwd_pd_;
      AllocateOutputTensor(context, *conv_fwd_pd,
                       dst_dims_mkl_order, tf_fmt, &dst_tensor);
      Tensor* filter_out_tensor = nullptr;
-          AllocateFilterOutputTensor(context, conv_prim_desc,
+      AllocateFilterOutputTensor(context, *conv_fwd_pd,
                                 TFShapeToMklDnnDims(filter_tf_shape),
                                 &filter_out_tensor);
-          PrepareAndExecuteNet(conv_prim_desc, &src, &filter,
+
-                              nullptr, &output, filter_out_tensor);
+      T* dst_data = static_cast<T*>(dst_tensor->flat<T>().data());
      // check whether src/filter need reorder
      std::vector<primitive> net;
      if (src_md.data.format != conv2d_fwd->src_fmt_)
          src.CheckReorderToOpMem(
              conv_fwd_pd.get()->src_primitive_desc(), &net);
      if (filter_md.data.format != conv2d_fwd->filter_fmt_)
          filter.CheckReorderToOpMem(
              conv_fwd_pd.get()->weights_primitive_desc(),
              filter.GetTensorBuffer(filter_out_tensor), &net);
      stream(stream::kind::eager).submit(net).wait();
      T* src_data = static_cast<T*>(
                src.GetOpMem().get_data_handle());
      T* filter_data = static_cast<T*>(
                filter.GetOpMem().get_data_handle());
      // execute convolution
      if (biasEnabled) {
        const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias);
        T* bias_data = static_cast<T*>(const_cast<T*>(
            bias_tensor.flat<T>().data()));
        conv2d_fwd->Execute(src_data, filter_data, bias_data, dst_data);
      } else {
        conv2d_fwd->Execute(src_data, filter_data, dst_data);
      }
-    } catch (mkldnn::error& e) {
+    } catch (mkldnn::error &e) {
      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + std::string(e.message) + ", in file " +
+                       ", message: " + std::string(e.message) +
-                         std::string(__FILE__) + ":" + std::to_string(__LINE__);
+                       ", in file " + std::string(__FILE__) + ":" +
-      OP_REQUIRES_OK(
+                       std::to_string(__LINE__);
-          context,
+      OP_REQUIRES_OK(context,
        errors::Aborted("Operation received an exception:", error_msg));
    }
  }
@ -706,6 +897,7 @@ class MklConv2DOp : public OpKernel {
  const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2;
  const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1;
  const int kDilationH = 0, kDilationW = 1;
  engine cpu_engine = engine(engine::cpu, 0);
  // Allocate output tensor.
  void AllocateOutputTensor(
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@ -241,6 +241,7 @@ class ScatterNdUpdateOp : public OpKernel {
 TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_ADD_SUB_CPU);
 TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_UPDATE_CPU);
 TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_CPU);
 TF_CALL_string(REGISTER_SCATTER_ND_CPU);
 // Registers GPU kernels.
 #if GOOGLE_CUDA
--- a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
+++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h
@ -160,6 +160,7 @@ struct ScatterNdFunctor<CPUDevice, T, Index, OP, IXDIM> {
  REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB);
 TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE);
 REGISTER_SCATTER_ND_INDEX(string, scatter_nd_op::UpdateOp::ADD);
 TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_MATH)
 #undef REGISTER_SCATTER_ND_MATH
--- a/tensorflow/core/kernels/segment_reduction_ops.h
+++ b/tensorflow/core/kernels/segment_reduction_ops.h
@ -16,35 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
 #define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_
 // This file requires the following include because it uses CudaAtomicMax:
 // #include "tensorflow/core/util/cuda_kernel_helper.h"
 // Unfortunately we can't add the #include, since it breaks compilation for
 // non-GPU targets. This only breaks in clang, because it's more strict for
 // template code and CudaAtomicMax is used in template context.
 // This file requires the following include because it uses CudaAtomicMax:
 // #include "tensorflow/core/util/cuda_kernel_helper.h"
 // Unfortunately we can't add the #include, since it breaks compilation for
 // non-GPU targets. This only breaks in clang, because it's more strict for
 // template code and CudaAtomicMax is used in template context.
 // This file requires the following include because it uses CudaAtomicMax:
 // #include "tensorflow/core/util/cuda_kernel_helper.h"
 // Unfortunately we can't add the #include, since it breaks compilation for
 // non-GPU targets. This only breaks in clang, because it's more strict for
 // template code and CudaAtomicMax is used in template context.
 // This file requires the following include because it uses CudaAtomicMax:
 // #include "tensorflow/core/util/cuda_kernel_helper.h"
 // Unfortunately we can't add the #include, since it breaks compilation for
 // non-GPU targets. This only breaks in clang, because it's more strict for
 // template code and CudaAtomicMax is used in template context.
 // This file requires the following include because it uses CudaAtomicMax:
 // #include "tensorflow/core/util/cuda_kernel_helper.h"
--- a/tensorflow/core/platform/default/gpu/cupti_wrapper.h
+++ b/tensorflow/core/platform/default/gpu/cupti_wrapper.h
@ -23,7 +23,7 @@ limitations under the License.
 #if defined(WIN32)
 #include "extras/CUPTI/include/cupti.h"
 #else
-#include "cuda/extras/CUPTI/include/cupti.h"
+#include "cupti.h"
 #endif
 namespace perftools {
 namespace gputools {
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@ -24,7 +24,7 @@ limitations under the License.
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc0"
+#define TF_VERSION_SUFFIX "-rc1"
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@ -19,6 +19,8 @@ limitations under the License.
 #include <string>
 #include <vector>
 #include <unordered_map>
 #include <utility>
 #include "mkl_dnn.h"
 #include "mkl_dnn_types.h"
@ -1759,7 +1761,90 @@ class MklDnnData {
  }
 };
-#endif  // INTEL_MKL_ML
+/// Base class for operations with reuse of DNN primitives
 ///
 class DnnOp {
 public:
  virtual ~DnnOp() {}
  // Dummy data. Its size, hard-coded as 256 here, does
  // not matter since MKL should never operate on this buffer.
  unsigned char DummyData[256];
 };
 const mkldnn::memory::dims NONE_DIMS = {};
 // This constant is used to declare dummy buffer (size), for MKL primitives
 template <typename T>
 class DnnOpFactory {
 public:
  DnnOpFactory() {}
  ~DnnOpFactory() {}
  DnnOp* GetOp(const std::string& key) {
    auto stream_iter = DnnOpFactory<T>::GetHashMap().find(key);
    if (stream_iter == DnnOpFactory<T>::GetHashMap().end()) {
      return nullptr;
    } else {
      return stream_iter->second;
    }
  }
  void SetOp(const std::string& key, DnnOp* op) {
    auto stream_iter = DnnOpFactory<T>::GetHashMap().find(key);
    CHECK(stream_iter == DnnOpFactory<T>::GetHashMap().end());
    DnnOpFactory<T>::GetHashMap()[key] = op;
  }
 private:
  static inline std::unordered_map<std::string, DnnOp*> &GetHashMap() {
    static thread_local std::unordered_map<std::string, DnnOp*> map_;
    return map_;
  }
 };
 // utility class for creating keys of MKL primitive pool.
 class FactoryKeyCreator {
 public:
  FactoryKeyCreator() {
    key_.reserve(kMaxKeyLength);
  }
  ~FactoryKeyCreator() {}
  void AddAsKey(const string &str) {
    auto buffer = reinterpret_cast<const char *>(str.c_str());
    Append(buffer, str.length());
  }
  void AddAsKey(const mkldnn::memory::dims &dims) {
    for (unsigned int i = 0; i < dims.size(); i++) {
      AddAsKey<int>(dims[i]);
    }
  }
  template <typename T>
  void AddAsKey(const T data) {
    auto buffer = reinterpret_cast<const char *>(&data);
    Append(buffer, sizeof(T));
  }
  std::string GetKey() {
    return key_;
  }
 private:
  string key_;
  const char delimiter = 'x';
  const int kMaxKeyLength = 256;
  void Append(const char* data, int len) {
    key_.append(data, len);
    key_.append(1, delimiter);
  }
 };
 #endif  // INTEL_MKL_DNN
 }  // namespace tensorflow
 #endif  // INTEL_MKL
--- a/tensorflow/docs_src/community/roadmap.md
+++ b/tensorflow/docs_src/community/roadmap.md
@ -1,5 +1,5 @@
 # Roadmap
-**Last updated: Feb 15, 2018**
+**Last updated: Apr 27, 2018**
 TensorFlow is a rapidly moving, community supported project. This document is intended 
 to provide guidance about priorities and focus areas of the core set of TensorFlow 
@ -14,12 +14,12 @@ expected in the next one to two releases.
 ### APIs
 #### High Level APIs:
-* Easy multi-GPU utilization with Estimators
+* Easy multi-GPU and TPU utilization with Estimators
 * Easy-to-use high-level pre-made estimators for Gradient Boosted Trees, Time Series, and other models
 #### Eager Execution:
 * Efficient utilization of multiple GPUs
-* Distributed training (multi-machine)
+* Distributed training support (multi-machine)
 * Performance improvements
 * Simpler export to a GraphDef/SavedModel 
@ -31,14 +31,14 @@ to create Keras models Eager- style via Model subclassing)
 #### Official Models:
 * A set of 
-[reference models](https://github.com/tensorflow/models/tree/master/official) 
+[models](https://github.com/tensorflow/models/tree/master/official) 
 across image recognition, speech, object detection, and 
  translation that demonstrate best practices and serve as a starting point for 
  high-performance model development.
 #### Contrib:
-* Deprecation notices added to parts of tf.contrib where preferred implementations exist outside of tf.contrib.
+* Deprecate parts of tf.contrib where preferred implementations exist outside of tf.contrib.
-* As much as possible, large projects inside tf.contrib moved to separate repositories.
+* As much as possible, move large projects inside tf.contrib to separate repositories.
 * The tf.contrib module will eventually be discontinued in its current form, experimental development will in future happen in other repositories.
@ -50,36 +50,72 @@ across image recognition, speech, object detection, and
 ### Platforms
 #### TensorFlow Lite:
-* Increased coverage of supported ops in TensorFlow Lite
+* Increase coverage of supported ops in TensorFlow Lite
 * Easier conversion of a trained TensorFlow graph for use on TensorFlow Lite
 * Support for GPU acceleration in TensorFlow Lite (iOS and Android)
 * Support for hardware accelerators via Android NeuralNets API 
-* Improved CPU performance by quantization and other network optimizations (eg. pruning, distillation)
+* Improve CPU performance by quantization and other network optimizations (eg. pruning, distillation)
-* Increased support for devices beyond Android and iOS (eg. RPi, Cortex-M)
+* Increase support for devices beyond Android and iOS (eg. RPi, Cortex-M)
 #### TensorFlow.js:
 * Release package for Node.js bindings to the TensorFlow C API through the TensorFlow.js backend interface
 * Expand support for importing TensorFlow SavedModels and Keras models into browser with unified APIs supporting retraining in browser
 * Improve Layers API and allow model exporting/saving
 * Release tfjs-data API for efficient data input pipelines
 #### TensorFlow with Swift:
 * Establish open source project including documentation, open design, and code availability.
 * Continue implementing and refining implementation and design through 2018.
 * Aim for implementation to be solid enough for general use later in 2018.
 ### Performance
 #### Distributed TensorFlow:
-* Multi-GPU support optimized for a variety of GPU topologies
+* Optimize Multi-GPU support for a variety of GPU topologies
-* Improved mechanisms for distributing computations on several machines
+* Improve mechanisms for distributing computations on several machines
-#### Optimizations:
+#### GPU Optimizations:
-* Mixed precision training support with initial example model and guide
+* Simplify mixed precision API with initial example model and guide.
-* Native TensorRT support
+* Finalize TensorRT API and move to core.
 * CUDA 9.2 and NCCL 2.x default in TensorFlow builds.
 * Optimizations for DGX-2.
 * Remove support for CUDA less than 8.x and cuDNN less than 6.x.
 #### CPU Optimizations
 * Int8 support for SkyLake via MKL
 * Dynamic loading of SIMD-optimized kernels
 * MKL for Linux and Windows
 ### End-to-end ML systems:
 #### TensorFlow Hub:
 * Expand support for module-types in TF Hub with TF Eager integration, Keras layers integration, and TensorFlow.js integration
 * Accept variable-sized image input
 * Improve multi-GPU estimator support
 * Document and improve TPU integration
 #### TensorFlow Extended:
 * Open source more of the TensorFlow Extended platform to facilitate adoption of TensorFlow in production settings.
 * Release TFX libraries for Data Validation
 ### Documentation and Resources:
 * Update documentation, tutorials and Getting Started guides on all features and APIs
 * Update [Youtube Tensorflow channel](https://youtube.com/tensorflow) weekly with new content:
 Coding TensorFlow - where we teach folks coding with tensorflow
 TensorFlow Meets - where we highlight community contributions
 Ask TensorFlow - where we answer community questions
 Guest and Showcase videos
 * Update [Official TensorFlow blog](https://blog.tensorflow.org) with regular articles from Google team and the Community
 ### Documentation and Usability:
 * Updated documentation, tutorials and Getting Started guides
 * Process to enable external contributions to tutorials, documentation, and blogs showcasing best practice use-cases of TensorFlow and high-impact applications
 ### Community and Partner Engagement
 #### Special Interest Groups: 
-* Mobilizing the community to work together in focused domains
+* Mobilize the community to work together in focused domains
 * [tf-distribute](https://groups.google.com/a/tensorflow.org/forum/#!forum/tf-distribute): build and packaging of TensorFlow
-* More to be identified and launched
+* SIG TensorBoard, SIG Rust, and more to be identified and launched
 #### Community:
 * Incorporate public feedback on significant design decisions via a Request-for-Comment (RFC) process
 * Formalize process for external contributions to land in TensorFlow and associated projects 
 * Grow global TensorFlow communities and user groups
 * Collaborate with partners to co-develop and publish research papers
 * Process to enable external contributions to tutorials, documentation, and blogs showcasing best practice use-cases of TensorFlow and high-impact applications
--- a/tensorflow/docs_src/get_started/checkpoints.md
+++ b/tensorflow/docs_src/get_started/checkpoints.md
@ -38,8 +38,10 @@ Estimators automatically write the following to disk:
    uses to create visualizations.
 To specify the top-level directory in which the Estimator stores its
-information, assign a value to the optional `model_dir` argument of any
+information, assign a value to the optional `model_dir` argument of *any*
-Estimator's constructor.  For example, the following code sets the `model_dir`
+`Estimator`'s constructor.
 Taking `DNNClassifier` as an example,
 the following code sets the `model_dir`
 argument to the `models/iris` directory:
 ```python
--- a/tensorflow/docs_src/get_started/feature_columns.md
+++ b/tensorflow/docs_src/get_started/feature_columns.md
@ -138,7 +138,7 @@ The model will represent the buckets as follows:
 |< 1960               | [1, 0, 0, 0] |
 |>= 1960 but < 1980   | [0, 1, 0, 0] |
 |>= 1980 but < 2000   | [0, 0, 1, 0] |
-|> 2000               | [0, 0, 0, 1] |
+|>= 2000              | [0, 0, 0, 1] |
 Why would you want to split a number—a perfectly valid input to your
 model—into a categorical value? Well, notice that the categorization splits a
--- a/tensorflow/docs_src/get_started/index.md
+++ b/tensorflow/docs_src/get_started/index.md
@ -10,7 +10,7 @@ course prior to diving into TensorFlow documentation:
 TensorFlow is a tool for machine learning. While it contains a wide range of
 functionality, TensorFlow is mainly designed for deep neural network models.
-The easiest way to get started with tensorflow is using Eager Execution.
+The easiest way to get started with TensorFlow is using Eager Execution.
  * @{$get_started/eager}, is for anyone new to  machine learning or TensorFlow.
--- a/tensorflow/docs_src/install/install_c.md
+++ b/tensorflow/docs_src/install/install_c.md
@ -38,7 +38,7 @@ enable TensorFlow for C:
         OS="linux" # Change to "darwin" for macOS
         TARGET_DIRECTORY="/usr/local"
         curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc0.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" |
           sudo tar -C $TARGET_DIRECTORY -xz
     The `tar` command extracts the TensorFlow C library into the `lib`
--- a/tensorflow/docs_src/install/install_go.md
+++ b/tensorflow/docs_src/install/install_go.md
@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go:
         TF_TYPE="cpu" # Change to "gpu" for GPU support
         TARGET_DIRECTORY='/usr/local'
         curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc0.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.8.0-rc1.tar.gz" |
         sudo tar -C $TARGET_DIRECTORY -xz
     The `tar` command extracts the TensorFlow C library into the `lib`
--- a/tensorflow/docs_src/install/install_java.md
+++ b/tensorflow/docs_src/install/install_java.md
@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs:
 <dependency>
  <groupId>org.tensorflow</groupId>
  <artifactId>tensorflow</artifactId>
-  <version>1.8.0-rc0</version>
+  <version>1.8.0-rc1</version>
 </dependency>
 ```
@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow:
               <dependency>
                 <groupId>org.tensorflow</groupId>
                 <artifactId>tensorflow</artifactId>
-                 <version>1.8.0-rc0</version>
+                 <version>1.8.0-rc1</version>
               </dependency>
             </dependencies>
         </project>
@ -124,12 +124,12 @@ instead:
 <dependency>
  <groupId>org.tensorflow</groupId>
  <artifactId>libtensorflow</artifactId>
-  <version>1.8.0-rc0</version>
+  <version>1.8.0-rc1</version>
 </dependency>
 <dependency>
  <groupId>org.tensorflow</groupId>
  <artifactId>libtensorflow_jni_gpu</artifactId>
-  <version>1.8.0-rc0</version>
+  <version>1.8.0-rc1</version>
 </dependency>
 ```
@ -148,7 +148,7 @@ refer to the simpler instructions above instead.
 Take the following steps to install TensorFlow for Java on Linux or macOS:
  1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc0.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar),
     which is the TensorFlow Java Archive (JAR).
  2. Decide whether you will run TensorFlow for Java on CPU(s) only or with
@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
         OS=$(uname -s | tr '[:upper:]' '[:lower:]')
         mkdir -p ./jni
         curl -L \
-           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc0.tar.gz" |
+           "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.8.0-rc1.tar.gz" |
           tar -xz -C ./jni
 ### Install on Windows
@ -175,10 +175,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS:
 Take the following steps to install TensorFlow for Java on Windows:
  1. Download
-     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc0.jar),
+     [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.8.0-rc1.jar),
     which is the TensorFlow Java Archive (JAR).
  2. Download the following Java Native Interface (JNI) file appropriate for
-     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc0.zip).
+     [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.8.0-rc1.zip).
  3. Extract this .zip file.
@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the
 downloaded `.jar` in your `classpath` by using the `-cp` compilation flag
 as follows:
-<pre><b>javac -cp libtensorflow-1.8.0-rc0.jar HelloTF.java</b></pre>
+<pre><b>javac -cp libtensorflow-1.8.0-rc1.jar HelloTF.java</b></pre>
 ### Running
@ -241,11 +241,11 @@ two files are available to the JVM:
 For example, the following command line executes the `HelloTF` program on Linux
 and macOS X:
-<pre><b>java -cp libtensorflow-1.8.0-rc0.jar:. -Djava.library.path=./jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.8.0-rc1.jar:. -Djava.library.path=./jni HelloTF</b></pre>
 And the following command line executes the `HelloTF` program on Windows:
-<pre><b>java -cp libtensorflow-1.8.0-rc0.jar;. -Djava.library.path=jni HelloTF</b></pre>
+<pre><b>java -cp libtensorflow-1.8.0-rc1.jar;. -Djava.library.path=jni HelloTF</b></pre>
 If the program prints <tt>Hello from <i>version</i></tt>, you've successfully
 installed TensorFlow for Java and are ready to use the API.  If the program
--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@ -1,139 +1,266 @@
 # Installing TensorFlow on Ubuntu
-This guide explains how to install TensorFlow on Ubuntu. Although these
+This guide explains how to install TensorFlow on Ubuntu Linux. While these
-instructions might also work on other Linux variants, we have only
+instructions may work on other Linux variants, they are tested and supported with
-tested (and we only support) these instructions on machines meeting the
+the following system requirements:
 following requirements:
-  * 64-bit desktops or laptops
+* 64-bit desktops or laptops
-  * Ubuntu 16.04 or higher
+* Ubuntu 16.04 or higher
-## Determine which TensorFlow to install
+## Choose which TensorFlow to install
-You must choose one of the following types of TensorFlow to install:
+The following TensorFlow variants are available for installation:
-  * **TensorFlow with CPU support only**. If your system does not have a
+* __TensorFlow with CPU support only__. If your system does not have a
-    NVIDIA® GPU, you must install this version. Note that this version of
+  NVIDIA®&nbsp;GPU, you must install this version. This version of TensorFlow is
-    TensorFlow is typically much easier to install (typically,
+  usually easier to install, so even if you have an NVIDIA GPU, we recommend
    in 5 or 10 minutes), so even if you have an NVIDIA GPU, we recommend
  installing this version first.
-  * **TensorFlow with GPU support**. TensorFlow programs typically run
+* __TensorFlow with GPU support__. TensorFlow programs usually run much faster on
-    significantly faster on a GPU than on a CPU. Therefore, if your
+  a GPU instead of a CPU. If you run performance-critical applications and your
-    system has a NVIDIA® GPU meeting the prerequisites shown below and you
+  system has an NVIDIA®&nbsp;GPU that meets the prerequisites, you should install
-    need to run performance-critical applications, you should ultimately
+  this version. See [TensorFlow GPU support](#NVIDIARequirements) for details.
    install this version.
 <a name="NVIDIARequirements"></a>
 ### NVIDIA requirements to run TensorFlow with GPU support
 If you are installing TensorFlow with GPU support using one of the
 mechanisms described in this guide, then the following NVIDIA software
 must be installed on your system:
  * [CUDA Toolkit 9.0](http://nvidia.com/cuda). For details, see
    [NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/).
    Ensure that you append the relevant CUDA pathnames to the
    `LD_LIBRARY_PATH` environment variable as described in the
    NVIDIA documentation.
  * [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see
    [NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/).
    Ensure that you create the `CUDA_HOME` environment variable as
    described in the NVIDIA documentation.
  * GPU card with CUDA Compute Capability 3.0 or higher for building
    from source and 3.5 or higher for our binaries. See
    [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for
    a list of supported GPU cards.
  * [GPU drivers](http://nvidia.com/driver) supporting your version of the CUDA
    Toolkit.
  * The libcupti-dev library, which is the NVIDIA CUDA Profile Tools Interface.
    This library provides advanced profiling support. To install this library,
    issue the following command for CUDA Toolkit >= 8.0:
    <pre>
    $ <b>sudo apt-get install cuda-command-line-tools</b>
    </pre>
    and add its path to your `LD_LIBRARY_PATH` environment variable:
    <pre>
    $ <b>export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64</b>
    </pre>
    For CUDA Toolkit <= 7.5 do:
    <pre>
    $ <b>sudo apt-get install libcupti-dev</b>
    </pre>
  * **[OPTIONAL]**  For optimized inferencing performance, you can also install
    **NVIDIA TensorRT 3.0**. The minimal set of TensorRT runtime components needed
    for use with the pre-built `tensorflow-gpu` package can be installed as follows:
    <pre>
    $ <b>wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</b>
    $ <b>sudo dpkg -i nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</b>
    $ <b>sudo apt-get update</b>
    $ <b>sudo apt-get install -y --allow-downgrades libnvinfer-dev libcudnn7-dev=7.0.5.15-1+cuda9.0 libcudnn7=7.0.5.15-1+cuda9.0</b>
    </pre>
    **IMPORTANT:** For compatibility with the pre-built `tensorflow-gpu`
    package, please use the Ubuntu **14.04** package of TensorRT as shown above,
    even when installing onto an Ubuntu 16.04 system.<br/>
    <br/>
    To build the TensorFlow-TensorRT integration module from source rather than
    using pre-built binaries, see the [module documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#using-tensorrt-in-tensorflow).
    For detailed TensorRT installation instructions, see [NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html).<br/>
    <br/>
    To avoid cuDNN version conflicts during later system upgrades, you can hold
    the cuDNN version at 7.0.5:
    <pre>
    $ <b> sudo apt-mark hold libcudnn7 libcudnn7-dev</b>
    </pre>
    To later allow upgrades, you can remove the hold:
    <pre>
    $ <b> sudo apt-mark unhold libcudnn7 libcudnn7-dev</b>
    </pre>
 If you have an earlier version of the preceding packages, please upgrade to
 the specified versions. If upgrading is not possible, then you may still run
 TensorFlow with GPU support, if you @{$install_sources$install TensorFlow from Sources}.
-## Determine how to install TensorFlow
+## How to install TensorFlow
-You must pick the mechanism by which you install TensorFlow. The
+There are a few options to install TensorFlow on your machine:
 supported choices are as follows:
-  * [Virtualenv](#InstallingVirtualenv)
+* [Use pip in a virtual environment](#InstallingVirtualenv) *(recommended)*
-  * ["native" pip](#InstallingNativePip)
+* [Use pip in your system environment](#InstallingNativePip)
-  * [Docker](#InstallingDocker)
+* [Configure a Docker container](#InstallingDocker)
-  * [Anaconda](#InstallingAnaconda)
+* [Use pip in Anaconda](#InstallingAnaconda)
-  * installing from sources, which is documented in
+* [Install TensorFlow from source](/install/install_sources)
    [a separate guide](https://www.tensorflow.org/install/install_sources).
-**We recommend the Virtualenv installation.**
+<a name="InstallingVirtualenv"></a>
-[Virtualenv](https://virtualenv.pypa.io/en/stable/)
+### Use `pip` in a virtual environment
 is a virtual Python environment isolated from other Python development,
 incapable of interfering with or being affected by other Python programs
 on the same machine.  During the Virtualenv installation process,
 you will install not only TensorFlow but also all the packages that
 TensorFlow requires.  (This is actually pretty easy.)
 To start working with TensorFlow, you simply need to "activate" the
 virtual environment.  All in all, Virtualenv provides a safe and
 reliable mechanism for installing and running TensorFlow.
-Native pip installs TensorFlow directly on your system without going
+Key Point: Using a virtual environment is the recommended install method.
-through any container system. **We recommend the native pip install for
+
-system administrators aiming to make TensorFlow available to everyone on a
+The [Virtualenv](https://virtualenv.pypa.io/en/stable/) tool creates virtual
-multi-user system.** Since a native pip installation is not walled-off in
+Python environments that are isolated from other Python development on the same
-a separate container, the pip installation might interfere with other
+machine. In this scenario, you install TensorFlow and its dependencies within a
-Python-based installations on your system. However, if you understand pip
+virtual environment that is available when *activated*. Virtualenv provides a
-and your Python environment, a native pip installation often entails only
+reliable way to install and run TensorFlow while avoiding conflicts with the rest
-a single command.
+of the system.
 ##### 1. Install Python, `pip`, and `virtualenv`.
 On Ubuntu, Python is automatically installed and `pip` is *usually* installed.
 Confirm the `python` and `pip` versions:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">python -V  # or: python3 -V</code>
  <code class="devsite-terminal">pip -V     # or: pip3 -V</code>
 </pre>
 To install these packages on Ubuntu:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">sudo apt-get install python-pip python-dev python-virtualenv   # for Python 2.7</code>
  <code class="devsite-terminal">sudo apt-get install python3-pip python3-dev python-virtualenv # for Python 3.n</code>
 </pre>
 We *recommend* using `pip` version 8.1 or higher. If using a release before
 version 8.1,  upgrade `pip`:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">sudo pip install -U pip</code>
 </pre>
 If not using Ubuntu and [setuptools](https://pypi.org/project/setuptools/) is
 installed, use `easy_install` to install `pip`:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">easy_install -U pip</code>
 </pre>
 ##### 2. Create a directory for the virtual environment and choose a Python interpreter.
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">mkdir ~/tensorflow  # somewhere to work out of</code>
  <code class="devsite-terminal">cd ~/tensorflow</code>
  <code># Choose one of the following Python environments for the ./venv directory:</code>
  <code class="devsite-terminal">virtualenv --system-site-packages <var>venv</var>            # Use python default (Python 2.7)</code>
  <code class="devsite-terminal">virtualenv --system-site-packages -p python3 <var>venv</var> # Use Python 3.n</code>
 </pre>
 ##### 3. Activate the Virtualenv environment.
 Use one of these shell-specific commands to activate the virtual environment:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">source ~/tensorflow/<var>venv</var>/bin/activate      # bash, sh, ksh, or zsh</code>
  <code class="devsite-terminal">source ~/tensorflow/<var>venv</var>/bin/activate.csh  # csh or tcsh</code>
  <code class="devsite-terminal">. ~/tensorflow/<var>venv</var>/bin/activate.fish      # fish</code>
 </pre>
 When the Virtualenv is activated, the shell prompt displays as `(venv) $`.
 ##### 4. Upgrade `pip` in the virtual environment.
 Within the active virtual environment, upgrade `pip`:
 <pre class="prettyprint lang-bsh">
 (venv)$ pip install -U pip
 </pre>
 You can install other Python packages within the virtual environment without
 affecting packages outside the `virtualenv`.
 ##### 5. Install TensorFlow in the virtual environment.
 Choose one of the available TensorFlow packages for installation:
 * `tensorflow` —Current release for CPU
 * `tensorflow-gpu` —Current release with GPU support
 * `tf-nightly` —Nightly build for CPU
 * `tf-nightly-gpu` —Nightly build with GPU support
 Within an active Virtualenv environment, use `pip` to install the package:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">pip install -U tensorflow</code>
 </pre>
 Use `pip list` to show the packages installed in the virtual environment.
 [Validate the install](#ValidateYourInstallation) and test the version:
 <pre class="prettyprint lang-bsh">
 (venv)$ python -c "import tensorflow as tf; print(tf.__version__)"
 </pre>
 Success: TensorFlow is now installed.
 Use the `deactivate` command to stop the Python virtual environment.
 #### Problems
 If the above steps failed, try installing the TensorFlow binary using the remote
 URL of the `pip` package:
 <pre class="prettyprint lang-bsh">
 (venv)$ pip install --upgrade <var>remote-pkg-URL</var>   # Python 2.7
 (venv)$ pip3 install --upgrade <var>remote-pkg-URL</var>  # Python 3.n
 </pre>
 The <var>remote-pkg-URL</var> depends on the operating system, Python version,
 and GPU support. See [here](#the_url_of_the_tensorflow_python_package) for the
 URL naming scheme and location.
 See [Common Installation Problems](#common_installation_problems) if you
 encounter problems.
 #### Uninstall TensorFlow
 To uninstall TensorFlow, remove the Virtualenv directory you created in step 2:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">deactivate  # stop the virtualenv</code>
  <code class="devsite-terminal">rm -r ~/tensorflow/<var>venv</var></code>
 </pre>
 <a name="InstallingNativePip"></a>
 ### Use `pip` in your system environment
 Use `pip` to install the TensorFlow package directly on your system without
 using a container or virtual environment for isolation. This method is
 recommended for system administrators that want a TensorFlow installation that is
 available to everyone on a multi-user system.
 Since a system install is not isolated, it could interfere with other
 Python-based installations. But if you understand `pip` and your Python
 environment, a system `pip` install is straightforward.
 See the
 [REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py)
 for a list of packages that TensorFlow installs.
 ##### 1. Install Python, `pip`, and `virtualenv`.
 On Ubuntu, Python is automatically installed and `pip` is *usually* installed.
 Confirm the `python` and `pip` versions:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">python -V  # or: python3 -V</code>
  <code class="devsite-terminal">pip -V     # or: pip3 -V</code>
 </pre>
 To install these packages on Ubuntu:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">sudo apt-get install python-pip python-dev   # for Python 2.7</code>
  <code class="devsite-terminal">sudo apt-get install python3-pip python3-dev # for Python 3.n</code>
 </pre>
 We *recommend* using `pip` version 8.1 or higher. If using a release before
 version 8.1,  upgrade `pip`:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">sudo pip install -U pip</code>
 </pre>
 If not using Ubuntu and [setuptools](https://pypi.org/project/setuptools/) is
 installed, use `easy_install` to install `pip`:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">easy_install -U pip</code>
 </pre>
 ##### 2. Install TensorFlow on system.
 Choose one of the available TensorFlow packages for installation:
 * `tensorflow` —Current release for CPU
 * `tensorflow-gpu` —Current release with GPU support
 * `tf-nightly` —Nightly build for CPU
 * `tf-nightly-gpu` —Nightly build with GPU support
 And use `pip` to install the package for Python 2 or 3:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">sudo pip install -U tensorflow   # Python 2.7</code>
  <code class="devsite-terminal">sudo pip3 install -U tensorflow  # Python 3.n</code>
 </pre>
 Use `pip list` to show the packages installed on the system.
 [Validate the install](#ValidateYourInstallation) and test the version:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">python -c "import tensorflow as tf; print(tf.__version__)"</code>
 </pre>
 Success: TensorFlow is now installed.
 #### Problems
 If the above steps failed, try installing the TensorFlow binary using the remote
 URL of the `pip` package:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">sudo pip install --upgrade <var>remote-pkg-URL</var>   # Python 2.7</code>
  <code class="devsite-terminal">sudo pip3 install --upgrade <var>remote-pkg-URL</var>  # Python 3.n</code>
 </pre>
 The <var>remote-pkg-URL</var> depends on the operating system, Python version,
 and GPU support. See [here](#the_url_of_the_tensorflow_python_package) for the
 URL naming scheme and location.
 See [Common Installation Problems](#common_installation_problems) if you
 encounter problems.
 #### Uninstall TensorFlow
 To uninstall TensorFlow on your system, use one of following commands:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">sudo pip uninstall tensorflow   # for Python 2.7</code>
  <code class="devsite-terminal">sudo pip3 uninstall tensorflow  # for Python 3.n</code>
 </pre>
 <a name="InstallingDocker"></a>
 ### Configure a Docker container
 Docker completely isolates the TensorFlow installation
 from pre-existing packages on your machine. The Docker container contains
@ -142,210 +269,6 @@ large (hundreds of MBs). You might choose the Docker installation if you are
 incorporating TensorFlow into a larger application architecture that already
 uses Docker.
 In Anaconda, you may use conda to create a virtual environment.
 However, within Anaconda, we recommend installing TensorFlow with the
 `pip install` command, not with the `conda install` command.
 **NOTE:** The conda package is community supported, not officially supported.
 That is, the TensorFlow team neither tests nor maintains the conda package.
 Use that package at your own risk.
 <a name="InstallingVirtualenv"></a>
 ## Installing with Virtualenv
 Take the following steps to install TensorFlow with Virtualenv:
  1. Install pip and Virtualenv by issuing one of the following commands:
     <pre>$ <b>sudo apt-get install python-pip python-dev python-virtualenv</b> # for Python 2.7
    $ <b>sudo apt-get install python3-pip python3-dev python-virtualenv</b> # for Python 3.n</pre>
  2. Create a Virtualenv environment by issuing one of the following commands:
     <pre>$ <b>virtualenv --system-site-packages</b> <i>targetDirectory</i> # for Python 2.7
    $ <b>virtualenv --system-site-packages -p python3</b> <i>targetDirectory</i> # for Python 3.n</pre>
     where <code><em>targetDirectory</em></code> specifies the top of the
     Virtualenv tree.  Our instructions assume that
     <code><em>targetDirectory</em></code> is `~/tensorflow`, but you may
     choose any directory.
  3. Activate the Virtualenv environment by issuing one of the following
     commands:
     <pre>$ <b>source ~/tensorflow/bin/activate</b> # bash, sh, ksh, or zsh
    $ <b>source ~/tensorflow/bin/activate.csh</b>  # csh or tcsh
    $ <b>. ~/tensorflow/bin/activate.fish</b>  # fish</pre>
     The preceding <tt>source</tt> command should change your prompt
     to the following:
     <pre>(tensorflow)$ </pre>
  4. Ensure pip ≥8.1 is installed:
     <pre>(tensorflow)$ <b>easy_install -U pip</b></pre>
  5. Issue one of the following commands to install TensorFlow in the active
     Virtualenv environment:
     <pre>(tensorflow)$ <b>pip install --upgrade tensorflow</b>      # for Python 2.7
    (tensorflow)$ <b>pip3 install --upgrade tensorflow</b>     # for Python 3.n
    (tensorflow)$ <b>pip install --upgrade tensorflow-gpu</b>  # for Python 2.7 and GPU
    (tensorflow)$ <b>pip3 install --upgrade tensorflow-gpu</b> # for Python 3.n and GPU</pre>
     If the above command succeeds, skip Step 6. If the preceding
     command fails, perform Step 6.
  6. (Optional) If Step 5 failed (typically because you invoked a pip version
     lower than 8.1), install TensorFlow in the active Virtualenv environment
     by issuing a command of the following format:
     <pre>(tensorflow)$ <b>pip install --upgrade</b> <i>tfBinaryURL</i>   # Python 2.7
    (tensorflow)$ <b>pip3 install --upgrade</b> <i>tfBinaryURL</i>  # Python 3.n </pre>
     where <code><em>tfBinaryURL</em></code> identifies the URL of the
     TensorFlow Python package. The appropriate value of
     <code><em>tfBinaryURL</em></code>depends on the operating system,
     Python version, and GPU support. Find the appropriate value for
     <code><em>tfBinaryURL</em></code> for your system
     [here](#the_url_of_the_tensorflow_python_package).  For example, if you
     are installing TensorFlow for Linux, Python 3.4, and CPU-only support,
     issue the following command to install TensorFlow in the active
     Virtualenv environment:
     <pre>(tensorflow)$ <b>pip3 install --upgrade \
     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre>
 If you encounter installation problems, see
 [Common Installation Problems](#common_installation_problems).
 ### Next Steps
 After installing TensorFlow,
 [validate the installation](#ValidateYourInstallation).
 Note that you must activate the Virtualenv environment each time you
 use TensorFlow. If the Virtualenv environment is not currently active,
 invoke one of the following commands:
 <pre> $ <b>source ~/tensorflow/bin/activate</b>      # bash, sh, ksh, or zsh
 $ <b>source ~/tensorflow/bin/activate.csh</b>  # csh or tcsh</pre>
 When the Virtualenv environment is active, you may run
 TensorFlow programs from this shell.  Your prompt will become
 the following to indicate that your tensorflow environment is active:
 <pre>(tensorflow)$ </pre>
 When you are done using TensorFlow, you may deactivate the
 environment by invoking the `deactivate` function as follows:
 <pre>(tensorflow)$ <b>deactivate</b> </pre>
 The prompt will revert back to your default prompt (as defined by the
 `PS1` environment variable).
 ### Uninstalling TensorFlow
 To uninstall TensorFlow, simply remove the tree you created.
 For example:
 <pre>$ <b>rm -r</b> <i>targetDirectory</i> </pre>
 <a name="InstallingNativePip"></a>
 ## Installing with native pip
 You may install TensorFlow through pip, choosing between a simple
 installation procedure or a more complex one.
 **Note:** The
 [REQUIRED_PACKAGES section of setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py)
 lists the TensorFlow packages that pip will install or upgrade.
 ### Prerequisite: Python and Pip
 Python is automatically installed on Ubuntu.  Take a moment to confirm
 (by issuing a `python -V` command) that one of the following Python
 versions is already installed on your system:
  * Python 2.7
  * Python 3.4+
 The pip or pip3 package manager is *usually* installed on Ubuntu.  Take a
 moment to confirm (by issuing a `pip -V` or `pip3 -V` command)
 that pip or pip3 is installed.  We strongly recommend version 8.1 or higher
 of pip or pip3.  If Version 8.1 or later is not installed, issue the
 following command, which will either install or upgrade to the latest
 pip version:
 <pre>$ <b>sudo apt-get install python-pip python-dev</b>   # for Python 2.7
 $ <b>sudo apt-get install python3-pip python3-dev</b> # for Python 3.n
 </pre>
 ### Install TensorFlow
 Assuming the prerequisite software is installed on your Linux host,
 take the following steps:
  1. Install TensorFlow by invoking **one** of the following commands:
     <pre>$ <b>pip install tensorflow</b>      # Python 2.7; CPU support (no GPU support)
    $ <b>pip3 install tensorflow</b>     # Python 3.n; CPU support (no GPU support)
    $ <b>pip install tensorflow-gpu</b>  # Python 2.7;  GPU support
    $ <b>pip3 install tensorflow-gpu</b> # Python 3.n; GPU support </pre>
     If the preceding command runs to completion, you should now
     [validate your installation](#ValidateYourInstallation).
  2. (Optional.) If Step 1 failed, install the latest version of TensorFlow
     by issuing a command of the following format:
     <pre>$ <b>sudo pip  install --upgrade</b> <i>tfBinaryURL</i>   # Python 2.7
    $ <b>sudo pip3 install --upgrade</b> <i>tfBinaryURL</i>   # Python 3.n </pre>
     where <code><em>tfBinaryURL</em></code> identifies the URL of the
     TensorFlow Python package. The appropriate value of
     <code><em>tfBinaryURL</em></code> depends on the operating system,
     Python version, and GPU support. Find the appropriate value for
     <code><em>tfBinaryURL</em></code>
     [here](#the_url_of_the_tensorflow_python_package).  For example, to
     install TensorFlow for Linux, Python 3.4, and CPU-only support, issue
     the following command:
     <pre>
     $ <b>sudo pip3 install --upgrade \
     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b>
     </pre>
     If this step fails, see
     [Common Installation Problems](#common_installation_problems).
 ### Next Steps
 After installing TensorFlow, [validate your installation](#ValidateYourInstallation).
 ### Uninstalling TensorFlow
 To uninstall TensorFlow, issue one of following commands:
 <pre>
 $ <b>sudo pip uninstall tensorflow</b>  # for Python 2.7
 $ <b>sudo pip3 uninstall tensorflow</b> # for Python 3.n
 </pre>
 <a name="InstallingDocker"></a>
 ## Installing with Docker
 Take the following steps to install TensorFlow through Docker:
  1. Install Docker on your machine as described in the
@ -364,7 +287,7 @@ Take the following steps to install TensorFlow through Docker:
 The remainder of this section explains how to launch a Docker container.
-### CPU-only
+#### CPU-only
 To launch a Docker container with CPU-only support (that is, without
 GPU support), enter a command of the following format:
@ -414,7 +337,7 @@ $ <b>docker run -it -p 8888:8888 tensorflow/tensorflow</b>
 Docker will download the TensorFlow binary image the first time you launch it.
-### GPU support
+#### GPU support
 Prior to installing TensorFlow with GPU support, ensure that your system meets all
 [NVIDIA software requirements](#NVIDIARequirements).  To launch a Docker container
@ -470,14 +393,22 @@ For more details see the
 [TensorFlow docker readme](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/docker).
-### Next Steps
+#### Next Steps
 You should now
 [validate your installation](#ValidateYourInstallation).
 <a name="InstallingAnaconda"></a>
-## Installing with Anaconda
+### Use `pip` in Anaconda
 Anaconda provides the `conda` utility to create a virtual environment. However,
 within Anaconda, we recommend installing TensorFlow using the `pip install`
 command and *not* with the `conda install` command.
 Caution: `conda` is a community supported package this is not officially
 maintained by the TensorFlow team. Use this package at your own risk since it is
 not tested on new TensorFlow releases.
 Take the following steps to install TensorFlow in an Anaconda environment:
@ -507,7 +438,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
     <pre>
     (tensorflow)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl</b></pre>
 <a name="ValidateYourInstallation"></a>
 ## Validate your installation
@ -563,11 +494,89 @@ installation problems](#common_installation_problems).
 If you are new to machine learning, we recommend the following:
 *  [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course)
-*  @{$get_started/get_started_for_beginners$Getting Started for ML Beginners}
+*  @{$get_started/eager}
 If you are experienced with machine learning but new to TensorFlow, see
@{$get_started/eager}.
 <a name="NVIDIARequirements"></a>
 ## TensorFlow GPU support
 To install TensorFlow with GPU support, configure the following NVIDIA® software
 on your system:
 * [CUDA Toolkit 9.0](http://nvidia.com/cuda). For details, see
  [NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/).
  Append the relevant CUDA pathnames to the `LD_LIBRARY_PATH` environmental
  variable as described in the NVIDIA documentation.
 * [cuDNN SDK v7](http://developer.nvidia.com/cudnn). For details, see
  [NVIDIA's documentation](http://docs.nvidia.com/deeplearning/sdk/cudnn-install/).
  Create the `CUDA_HOME` environment variable as described in the NVIDIA
  documentation.
 * A GPU card with CUDA Compute Capability 3.0 or higher for building TensorFlow
  from source. To use the TensorFlow binaries, version 3.5 or higher is required.
  See the [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a
  list of supported GPU cards.
 * [GPU drivers](http://nvidia.com/driver) that support your version of the CUDA
  Toolkit.
 * The `libcupti-dev` library is the NVIDIA CUDA Profile Tools Interface. This
  library provides advanced profiling support. To install this library,
  use the following command for CUDA Toolkit >= 8.0:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">sudo apt-get install cuda-command-line-tools</code>
 </pre>
 Add this path to the `LD_LIBRARY_PATH` environmental variable:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64</code>
 </pre>
 For CUDA Toolkit <= 7.5 use:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">sudo apt-get install libcupti-dev</code>
 </pre>
 * *OPTIONAL*:  For optimized performance during inference, install
  *NVIDIA&nbsp;TensorRT&nbsp;3.0*. To install the minimal amount of TensorRT
  runtime components required to use with the pre-built `tensorflow-gpu` package:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</code>
  <code class="devsite-terminal">sudo dpkg -i nvinfer-runtime-trt-repo-ubuntu1404-3.0.4-ga-cuda9.0_1.0-1_amd64.deb</code>
  <code class="devsite-terminal">sudo apt-get update</code>
  <code class="devsite-terminal">sudo apt-get install -y --allow-downgrades libnvinfer-dev libcudnn7-dev=7.0.5.15-1+cuda9.0 libcudnn7=7.0.5.15-1+cuda9.0</code>
 </pre>
 Note: For compatibility with the pre-built `tensorflow-gpu` package, use the
 Ubuntu *14.04* package of TensorRT (shown above). Use this even when installing
 on an Ubuntu 16.04 system.
 To build the TensorFlow-TensorRT integration module from source instead of using
 the pre-built binaries, see the
 [module documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#using-tensorrt-in-tensorflow).
 For detailed TensorRT installation instructions, see
 [NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html).
 To avoid cuDNN version conflicts during later system upgrades, hold the cuDNN
 version at 7.0.5:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">sudo apt-mark hold libcudnn7 libcudnn7-dev</code>
 </pre>
 To allow upgrades, remove the this hold:
 <pre class="prettyprint lang-bsh">
  <code class="devsite-terminal">sudo apt-mark unhold libcudnn7 libcudnn7-dev</code>
 </pre>
 If you have an earlier version of the preceding packages, upgrade to the
 specified versions. If upgrading is not possible, you can still run TensorFlow
 with GPU support by @{$install_sources}.
 ## Common installation problems
@ -581,7 +590,7 @@ ask a new question about it on Stack Overflow and specify
 the `tensorflow` tag.
 <table>
-<tr> <th>Stack Overflow Link</th> <th>Error Message</th> </tr>
+<tr> <th>Link to GitHub or Stack&nbsp;Overflow</th> <th>Error Message</th> </tr>
 <tr>
  <td><a href="https://stackoverflow.com/q/36159194">36159194</a></td>
@ -681,14 +690,14 @@ This section documents the relevant values for Linux installations.
 CPU only:
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp27-none-linux_x86_64.whl
 </pre>
 GPU support:
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp27-none-linux_x86_64.whl
 </pre>
 Note that GPU support requires the NVIDIA hardware and software described in
@ -700,14 +709,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
 </pre>
 GPU support:
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp34-cp34m-linux_x86_64.whl
 </pre>
 Note that GPU support requires the NVIDIA hardware and software described in
@ -719,14 +728,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
 </pre>
 GPU support:
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp35-cp35m-linux_x86_64.whl
 </pre>
@ -738,14 +747,14 @@ Note that GPU support requires the NVIDIA hardware and software described in
 CPU only:
 <pre>
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
 </pre>
 GPU support:
 <pre>
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.8.0rc1-cp36-cp36m-linux_x86_64.whl
 </pre>
--- a/tensorflow/docs_src/install/install_mac.md
+++ b/tensorflow/docs_src/install/install_mac.md
@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv:
     TensorFlow in the active Virtualenv is as follows:
     <pre> $ <b>pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl</b></pre>
 If you encounter installation problems, see
 [Common Installation Problems](#common-installation-problems).
@ -242,7 +242,7 @@ take the following steps:
     issue the following command:
     <pre> $ <b>sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl</b> </pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl</b> </pre>
     If the preceding command fails, see
     [installation problems](#common-installation-problems).
@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
     TensorFlow for Python 2.7:
     <pre> (<i>targetDirectory</i>)$ <b>pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl</b></pre>
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl</b></pre>
 <a name="ValidateYourInstallation"></a>
@ -524,7 +524,7 @@ The value you specify depends on your Python version.
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py2-none-any.whl
 </pre>
@ -532,5 +532,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py2-none-a
 <pre>
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.8.0rc1-py3-none-any.whl
 </pre>
--- a/tensorflow/docs_src/install/install_sources.md
+++ b/tensorflow/docs_src/install/install_sources.md
@ -354,10 +354,10 @@ Invoke `pip install` to install that pip package.
 The filename of the `.whl` file depends on your platform.
 For example, the following command will install the pip package
-for TensorFlow 1.8.0rc0 on Linux:
+for TensorFlow 1.8.0rc1 on Linux:
 <pre>
-$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc0-py2-none-any.whl</b>
+$ <b>sudo pip install /tmp/tensorflow_pkg/tensorflow-1.8.0rc1-py2-none-any.whl</b>
 </pre>
 ## Validate your installation
--- a/tensorflow/docs_src/performance/xla/tfcompile.md
+++ b/tensorflow/docs_src/performance/xla/tfcompile.md
@ -86,7 +86,7 @@ code. `tf_library` utilizes `tfcompile` to compile the TensorFlow graph into
 executable code.
 ```build
-load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
+load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
 # Use the tf_library macro to compile your graph into executable code.
 tf_library(
@ -258,8 +258,8 @@ file.
 ```build
 # Example of linking your binary
-# Also see //third_party/tensorflow/compiler/aot/tests/BUILD
+# Also see //tensorflow/compiler/aot/tests/BUILD
-load("//third_party/tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
+load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
 # The same tf_library call from step 2 above.
 tf_library(
--- a/tensorflow/examples/tutorials/estimators/init.py
+++ b/tensorflow/examples/tutorials/estimators/init.py
--- a/tensorflow/examples/tutorials/input_fn/init.py
+++ b/tensorflow/examples/tutorials/input_fn/init.py
--- a/tensorflow/examples/tutorials/layers/init.py
+++ b/tensorflow/examples/tutorials/layers/init.py
--- a/tensorflow/examples/tutorials/monitors/init.py
+++ b/tensorflow/examples/tutorials/monitors/init.py
--- a/tensorflow/examples/tutorials/monitors/iris_monitors.py
+++ b/tensorflow/examples/tutorials/monitors/iris_monitors.py
@ -32,9 +32,9 @@ IRIS_TEST = os.path.join(os.path.dirname(__file__), "iris_test.csv")
 def main(unused_argv):
  # Load datasets.
  training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
-      filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float)
+      filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32)
  test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
-      filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float)
+      filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32)
  validation_metrics = {
      "accuracy":
@ -83,7 +83,7 @@ def main(unused_argv):
  # Classify two new flower samples.
  new_samples = np.array(
-      [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float)
+      [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
  y = list(classifier.predict(new_samples))
  print("Predictions: {}".format(str(y)))
--- a/tensorflow/go/README.md
+++ b/tensorflow/go/README.md
@ -5,7 +5,7 @@ Construct and execute TensorFlow graphs in Go.
 [![GoDoc](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go?status.svg)](https://godoc.org/github.com/tensorflow/tensorflow/tensorflow/go)
 > *WARNING*: The API defined in this package is not stable and can change
-> without notice. The same goes for the awkward package path
+> without notice. The same goes for the package path:
 > (`github.com/tensorflow/tensorflow/tensorflow/go`).
 ## Quickstart
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@ -21386,7 +21386,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
 //    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
 //
 // The `bad_color` argument is the color to use in the generated images for
-// non-finite input values.  It is a `unit8` 1-D tensor of length `channels`.
+// non-finite input values.  It is a `uint8` 1-D tensor of length `channels`.
 // Each element must be in the range `[0, 255]` (It represents the value of a
 // pixel in the output image).  Non-finite values in the input tensor are
 // replaced by this tensor in the output image.  The default value is the color
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@ -644,11 +644,9 @@ class Estimator(object):
              sharded=True)
          saver_for_restore.restore(session, checkpoint_path)
          # pylint: disable=protected-access
          local_init_op = (
              estimator_spec.scaffold.local_init_op or
-              monitored_session.Scaffold._default_local_init_op())
+              monitored_session.Scaffold.default_local_init_op())
          # pylint: enable=protected-access
          # Perform the export
          builder = saved_model_builder.SavedModelBuilder(temp_export_dir)
--- a/tensorflow/python/keras/_impl/keras/estimator.py
+++ b/tensorflow/python/keras/_impl/keras/estimator.py
@ -29,12 +29,14 @@ from tensorflow.python.estimator import run_config as run_config_lib
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import models
 from tensorflow.python.keras._impl.keras import optimizers
 from tensorflow.python.keras._impl.keras.engine.base_layer import Layer
 from tensorflow.python.keras._impl.keras.engine.network import Network
 from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics as metrics_module
 from tensorflow.python.ops import variables as variables_module
@ -55,6 +57,17 @@ def _cast_tensor_to_floatx(x):
    return math_ops.cast(x, K.floatx())
 def _convert_tensor(x):
  """Create or cast tensor if needed."""
  if not tensor_util.is_tensor(x):
    # x is a numpy array
    x = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(x)
  if check_ops.is_numeric_tensor(x):
    # is_numeric_tensor returns False if provided with a numpy array
    x = _cast_tensor_to_floatx(x)
  return x
 def _any_variable_initalized():
  """Check if any variable has been initialized in the Keras model.
@ -86,7 +99,7 @@ def _create_ordered_io(keras_model, estimator_io, is_input=True):
  if isinstance(estimator_io, (list, tuple)):
    # Case currently not supported by most built-in input_fn,
    # but it's good to have for sanity
-    return [_cast_tensor_to_floatx(x) for x in estimator_io]
+    return [_convert_tensor(x) for x in estimator_io]
  elif isinstance(estimator_io, dict):
    if is_input:
      if keras_model._is_graph_network:
@ -108,12 +121,12 @@ def _create_ordered_io(keras_model, estimator_io, is_input=True):
            'It needs to match one '
            'of the following: %s' % ('input' if is_input else 'output', key,
                                      ', '.join(keras_io_names)))
-      tensors = [_cast_tensor_to_floatx(estimator_io[io_name])
+      tensors = [_convert_tensor(estimator_io[io_name])
                 for io_name in keras_io_names]
    return tensors
  else:
    # Plain array.
-    return _cast_tensor_to_floatx(estimator_io)
+    return _convert_tensor(estimator_io)
 def _in_place_subclassed_model_reset(model):
@ -274,8 +287,7 @@ def _clone_and_build_model(mode,
                                        is_input=False)
  else:
    target_tensors = [
-        _cast_tensor_to_floatx(
+        _convert_tensor(labels)
            sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(labels))
    ]
  if keras_model._is_graph_network:
--- a/tensorflow/python/keras/_impl/keras/estimator_test.py
+++ b/tensorflow/python/keras/_impl/keras/estimator_test.py
@ -30,6 +30,7 @@ from tensorflow.python.estimator.inputs import numpy_io
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras._impl import keras
 from tensorflow.python.keras._impl.keras import backend as K
 from tensorflow.python.keras._impl.keras import testing_utils
 from tensorflow.python.keras._impl.keras.applications import mobilenet
 from tensorflow.python.keras._impl.keras.optimizers import SGD
@ -142,16 +143,20 @@ def randomize_io_type(array, name):
 def multi_inputs_multi_outputs_model():
  # test multi-input layer
  a = keras.layers.Input(shape=(16,), name='input_a')
  b = keras.layers.Input(shape=(16,), name='input_b')
  m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m')
  dense = keras.layers.Dense(8, name='dense_1')
  a_2 = dense(a)
  # Apply a mask
  s_2 = keras.layers.Lambda(lambda k:
                            K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2])
  b_2 = dense(b)
-  merged = keras.layers.concatenate([a_2, b_2], name='merge')
+  merged = keras.layers.concatenate([s_2, b_2], name='merge')
  c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged)
  d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged)
-  model = keras.models.Model(inputs=[a, b], outputs=[c, d])
+  model = keras.models.Model(inputs=[a, b, m], outputs=[c, d])
  model.compile(
      loss='categorical_crossentropy',
      optimizer='rmsprop',
@ -352,18 +357,27 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
        test_samples=50,
        input_shape=(16,),
        num_classes=2)
    np.random.seed(_RANDOM_SEED)
    (input_m_train, _), (input_m_test, _) = testing_utils.get_test_data(
        train_samples=_TRAIN_SIZE,
        test_samples=50,
        input_shape=(8,),
        num_classes=2)
    c_train = keras.utils.to_categorical(c_train)
    c_test = keras.utils.to_categorical(c_test)
    d_train = keras.utils.to_categorical(d_train)
    d_test = keras.utils.to_categorical(d_test)
    def train_input_fn():
-      input_dict = {'input_a': a_train, 'input_b': b_train}
+      input_dict = {'input_a': a_train, 'input_b': b_train,
                    'input_m': input_m_train > 0}
      output_dict = {'dense_2': c_train, 'dense_3': d_train}
      return input_dict, output_dict
    def eval_input_fn():
-      input_dict = {'input_a': a_test, 'input_b': b_test}
+      input_dict = {'input_a': a_test, 'input_b': b_test,
                    'input_m': input_m_test > 0}
      output_dict = {'dense_2': c_test, 'dense_3': d_test}
      return input_dict, output_dict
--- a/tensorflow/python/kernel_tests/division_past_test.py
+++ b/tensorflow/python/kernel_tests/division_past_test.py
@ -35,8 +35,7 @@ class DivisionTestCase(test.TestCase):
    """Test all the different ways to divide."""
    values = [1, 2, 7, 11]
    functions = (lambda x: x), constant_op.constant
-    # TODO(irving): Test int8, int16 once we support casts for those.
+    dtypes = np.int8, np.int16, np.int32, np.int64, np.float32, np.float64
    dtypes = np.int32, np.int64, np.float32, np.float64
    tensors = []
    checks = []
--- a/tensorflow/python/kernel_tests/reduce_join_op_test.py
+++ b/tensorflow/python/kernel_tests/reduce_join_op_test.py
@ -160,7 +160,7 @@ class ReduceJoinTest(UnicodeTestCase):
            separator=separator)
      if not reduction_indices:
        truth = constant_op.constant(truth)
-      truth_squeezed = array_ops.squeeze(truth, squeeze_dims=reduction_indices)
+      truth_squeezed = array_ops.squeeze(truth, axis=reduction_indices)
      output_array = output.eval()
      output_keep_dims_array = output_keep_dims.eval()
      truth_array = truth.eval()
--- a/tensorflow/python/kernel_tests/reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/reduction_ops_test.py
@ -889,9 +889,9 @@ class AnyReductionTest(test.TestCase):
 class CountNonzeroReductionTest(test.TestCase):
-  def _compare(self, x, reduction_axes, keepdims, use_gpu=False,
+  def _compare(self, x, reduction_axes, keepdims, use_gpu=False, zero=0,
               feed_dict=None):
-    np_ans = (x != 0).astype(np.int32)
+    np_ans = (x != zero).astype(np.int32)
    if reduction_axes is None:
      np_ans = np.sum(np_ans, keepdims=keepdims)
    else:
@ -958,6 +958,37 @@ class CountNonzeroReductionTest(test.TestCase):
          y = math_ops.count_nonzero(x, [0])
          self.assertAllEqual(y.eval(), np.zeros(9938))
  def testStringReduce(self):
    # Test case for GitHub issue 18712
    with self.test_session() as sess:
      v = math_ops.count_nonzero(constant_op.constant(["test"]))
      self.assertAllClose(sess.run(v), 1)
  def testStringReduce1D(self):
    # Create a 1D array of strings
    x = np.asarray(["", "", "a", "", "", "b"])
    self._compare(x, None, keepdims=False, zero=np.str(""))
    self._compare(x, [], keepdims=False, zero=np.str(""))
    self._compare(x, [0], keepdims=False, zero=np.str(""))
    self._compare(x, None, keepdims=True, zero=np.str(""))
    self._compare(x, [], keepdims=True, zero=np.str(""))
    self._compare(x, [0], keepdims=True, zero=np.str(""))
  def testStringReduce2D(self):
    # Create a 2D array of strings
    x = np.asarray([["", "", "a", "", "", "b"],
                    ["", "c", "", "d", "", ""],
                    ["e", "", "f", "", "", ""]])
    self._compare(x, None, keepdims=False, zero=np.str(""))
    self._compare(x, [], keepdims=False, zero=np.str(""))
    self._compare(x, [0], keepdims=False, zero=np.str(""))
    self._compare(x, [1], keepdims=False, zero=np.str(""))
    self._compare(x, [0, 1], keepdims=False, zero=np.str(""))
    self._compare(x, None, keepdims=True, zero=np.str(""))
    self._compare(x, [], keepdims=True, zero=np.str(""))
    self._compare(x, [0], keepdims=True, zero=np.str(""))
    self._compare(x, [0, 1], keepdims=True, zero=np.str(""))
 if __name__ == "__main__":
  test.main()
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@ -364,6 +364,42 @@ class ScatterNdTest(test.TestCase):
    del input_  # input_ is not used in scatter_nd
    return array_ops.scatter_nd(indices, updates, shape)
  def testString(self):
    indices = constant_op.constant([[4], [3], [1], [7]],
                                   dtype=dtypes.int32)
    updates = constant_op.constant(["four", "three", "one", "seven"],
                                   dtype=dtypes.string)
    expected = np.array([b"", b"one", b"", b"three", b"four",
                         b"", b"", b"seven"])
    scatter = self.scatter_nd(indices, updates, shape=(8,))
    with self.test_session() as sess:
      result = sess.run(scatter)
      self.assertAllEqual(expected, result)
    # Same indice is updated twice by same value.
    indices = constant_op.constant([[4], [3], [3], [7]],
                                   dtype=dtypes.int32)
    updates = constant_op.constant(["a", "b", "b", "c"],
                                   dtype=dtypes.string)
    expected = np.array([b"", b"", b"", b"bb", b"a", b"", b"", b"c"])
    scatter = self.scatter_nd(indices, updates, shape=(8,))
    with self.test_session() as sess:
      result = sess.run(scatter)
      self.assertAllEqual(expected, result)
    # Same indice is updated twice by different value.
    indices = constant_op.constant([[4], [3], [3], [7]],
                                   dtype=dtypes.int32)
    updates = constant_op.constant(["a", "b", "c", "d"],
                                   dtype=dtypes.string)
    expected = [np.array([b"", b"", b"", b"bc", b"a", b"", b"", b"d"]),
                np.array([b"", b"", b"", b"cb", b"a", b"", b"", b"d"])]
    scatter = self.scatter_nd(indices, updates, shape=(8,))
    with self.test_session() as sess:
      result = sess.run(scatter)
      self.assertTrue(np.array_equal(result, expected[0]) or
                      np.array_equal(result, expected[1]))
  def testRank3ValidShape(self):
    indices = array_ops.zeros([2, 2, 2], dtypes.int32)
    updates = array_ops.zeros([2, 2, 2], dtypes.int32)
@ -584,6 +620,10 @@ class ScatterNdNonAliasingAddTest(ScatterNdTest):
        shape, dtype=updates.dtype))
    return array_ops.scatter_nd_non_aliasing_add(input_, indices, updates)
  def testString(self):
    # Not supported yet.
    pass
 if __name__ == "__main__":
  test.main()
--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@ -196,7 +196,7 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index):
            array_ops.where(
                math_ops.logical_and(grad.indices >= start,
                                     grad.indices < end)),
-            squeeze_dims=[1])
+            axis=[1])
        new_indices = array_ops.gather(grad.indices, indices_to_select) - start
        new_values = array_ops.gather(grad.values, indices_to_select)
        out_grads.append(ops.IndexedSlices(new_values, new_indices, size))
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@ -994,9 +994,7 @@ def unstack(value, num=None, axis=0, name="unstack"):
    `value[:, i, :, :]` and each tensor in `output` will have shape `(A, C, D)`.
  Etc.
-  This is the opposite of stack.  The numpy equivalent is
+  This is the opposite of stack.
      tf.unstack(x, n) = np.unstack(x)
  Args:
    value: A rank `R > 0` `Tensor` to be unstacked.
@ -1720,7 +1718,9 @@ def placeholder(dtype, shape=None, name=None):
    print(sess.run(y, feed_dict={x: rand_array}))  # Will succeed.
  ```
-  @compatibility{eager} Placeholders are not compatible with eager execution.
+  @compatibility(eager)
  Placeholders are not compatible with eager execution.
  @end_compatibility
  Args:
    dtype: The type of elements in the tensor to be fed.
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@ -652,7 +652,7 @@ def pad_to_bounding_box(image, offset_height, offset_width, target_height,
    padded.set_shape(padded_shape)
    if not is_batch:
-      padded = array_ops.squeeze(padded, squeeze_dims=[0])
+      padded = array_ops.squeeze(padded, axis=[0])
    return padded
@ -732,7 +732,7 @@ def crop_to_bounding_box(image, offset_height, offset_width, target_height,
    cropped.set_shape(cropped_shape)
    if not is_batch:
-      cropped = array_ops.squeeze(cropped, squeeze_dims=[0])
+      cropped = array_ops.squeeze(cropped, axis=[0])
    return cropped
@ -849,7 +849,7 @@ def resize_image_with_crop_or_pad(image, target_height, target_width):
    resized = control_flow_ops.with_dependencies(assert_ops, resized)
    if not is_batch:
-      resized = array_ops.squeeze(resized, squeeze_dims=[0])
+      resized = array_ops.squeeze(resized, axis=[0])
    return resized
@ -942,7 +942,7 @@ def resize_images(images,
           for x in [new_width_const, width, new_height_const, height]) and (
               width == new_width_const and height == new_height_const):
      if not is_batch:
-        images = array_ops.squeeze(images, squeeze_dims=[0])
+        images = array_ops.squeeze(images, axis=[0])
      return images
    if method == ResizeMethod.BILINEAR:
@ -965,7 +965,7 @@ def resize_images(images,
    images.set_shape([None, new_height_const, new_width_const, None])
    if not is_batch:
-      images = array_ops.squeeze(images, squeeze_dims=[0])
+      images = array_ops.squeeze(images, axis=[0])
    return images
--- a/Show More
+++ b/Show More