From a68bf787b3d14b5e9e6a64a641d36ac35d69fe8d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <nobody@tensorflow.org>
Date: Wed, 25 May 2016 06:46:58 -0800
Subject: [PATCH 01/20] Make it possible to override the node color in dot
 graphs via DotOptions. Change: 123212554

---
 tensorflow/core/graph/dot.cc | 13 ++++++++++---
 tensorflow/core/graph/dot.h  |  5 +++++
 2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/tensorflow/core/graph/dot.cc b/tensorflow/core/graph/dot.cc
index 799bbe71475..a546b84ee13 100644
--- a/tensorflow/core/graph/dot.cc
+++ b/tensorflow/core/graph/dot.cc
@@ -32,7 +32,7 @@ static string GraphNodeName(const DotOptions& opts, const Node* n) {
   return strings::StrCat("N", n->id());
 }
 
-bool ShoulDisplayOpType(const Node* n) {
+bool ShouldDisplayOpType(const Node* n) {
   if (n->type_string() == "NoOp") {
     return false;
   }
@@ -125,7 +125,7 @@ string DotGraph(const Graph& g, const DotOptions& opts) {
       continue;
     }
     string label = src->name();
-    if (ShoulDisplayOpType(src)) {
+    if (ShouldDisplayOpType(src)) {
       // Append the op type if it is not directly deducible from the op name.
       strings::StrAppend(&label, "\\n(", src->type_string(), ")");
     }
@@ -137,7 +137,14 @@ string DotGraph(const Graph& g, const DotOptions& opts) {
       shape = "oval";
     } else {
       const string& d = src->assigned_device_name();
-      const int dindex = (!d.empty()) ? device_index[d] : -1;
+
+      int dindex;
+      if (opts.node_color) {
+        dindex = opts.node_color(src);
+      } else {
+        dindex = (!d.empty()) ? device_index[d] : -1;
+      }
+
       if (dindex >= 0) {
         color = ColorFor(dindex);
       }
diff --git a/tensorflow/core/graph/dot.h b/tensorflow/core/graph/dot.h
index 79a538978a8..96e48773a9a 100644
--- a/tensorflow/core/graph/dot.h
+++ b/tensorflow/core/graph/dot.h
@@ -48,6 +48,11 @@ struct DotOptions {
   // A function that returns the "cost" of the edge.  The dot display
   // makes a edge thickness proportional to its cost.
   std::function<double(const Edge*)> edge_cost;
+
+  // A function that returns a color number to apply to each node. < 0 means
+  // no color. A color will be assigned to each color number from a palette;
+  // adjacent color numbers will receive different colors.
+  std::function<int(const Node*)> node_color;
 };
 
 // Return a string that contains a graphviz specification of the graph.

From e3b3fd1b87f8ea57a2240edade03828c6ed89ef6 Mon Sep 17 00:00:00 2001
From: Josh Levenberg <josh11b@tensorflow.org>
Date: Wed, 25 May 2016 09:17:27 -0800
Subject: [PATCH 02/20] Fix RandomShuffle for huge tensors. Change: 123226210

---
 tensorflow/core/kernels/random_shuffle_op.cc | 25 +++++++++++++-------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/kernels/random_shuffle_op.cc b/tensorflow/core/kernels/random_shuffle_op.cc
index d87883eae83..c81929de8d4 100644
--- a/tensorflow/core/kernels/random_shuffle_op.cc
+++ b/tensorflow/core/kernels/random_shuffle_op.cc
@@ -46,6 +46,19 @@ static inline void RandomShuffle(Iter first, Iter last, Random& uniform) {
   }
 }
 
+template <class IntT, class InT, class OutT, class Random>
+static void IndexedShuffle(const int64 size, const InT& input_mat,
+                           OutT output_mat, Random& uniform) {
+  std::vector<IntT> permutation(size);
+  for (IntT i = 0; i < size; i++) {
+    permutation[i] = i;
+  }
+  RandomShuffle(permutation.begin(), permutation.end(), uniform);
+  for (IntT i = 0; i < size; i++) {
+    output_mat.template chip<0>(i) = input_mat.template chip<0>(permutation[i]);
+  }
+}
+
 template <typename T>
 class RandomShuffleOp : public OpKernel {
  public:
@@ -79,14 +92,10 @@ class RandomShuffleOp : public OpKernel {
                        context->allocate_output(0, input.shape(), &output));
         const auto input_mat = input.flat_outer_dims<T>();
         auto output_mat = output->flat_outer_dims<T>();
-        std::vector<int> permutation(size);
-        for (int i = 0; i < size; i++) {
-          permutation[i] = i;
-        }
-        RandomShuffle(permutation.begin(), permutation.end(), uniform);
-        for (int i = 0; i < size; i++) {
-          output_mat.template chip<0>(i) =
-              input_mat.template chip<0>(permutation[i]);
+        if (size < kint32max) {
+          IndexedShuffle<int32>(size, input_mat, output_mat, uniform);
+        } else {
+          IndexedShuffle<int64>(size, input_mat, output_mat, uniform);
         }
       }
     }

From 8515a76345e5660e7a521b938946a1b33f4556b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <nobody@tensorflow.org>
Date: Wed, 25 May 2016 09:27:39 -0800
Subject: [PATCH 03/20] Use tf.GraphKeys.UPDATE_OPS as the default
 updates_collection for batch_norm. Change: 123227324

---
 .../contrib/layers/python/layers/layers.py    | 51 ++++---------------
 .../layers/python/layers/layers_test.py       | 30 +++++------
 2 files changed, 25 insertions(+), 56 deletions(-)

diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 261103a746f..de447847f21 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -39,6 +39,7 @@ from tensorflow.python.training import moving_averages
 # TODO(b/28426988): Remove legacy_* when all uses have migrated to new API.
 __all__ = ['bias_add',
            'batch_norm',
+           'conv2d',
            'convolution2d',
            'fully_connected',
            'linear',
@@ -113,7 +114,7 @@ def batch_norm(inputs,
                scale=False,
                epsilon=0.001,
                activation_fn=None,
-               updates_collection=None,
+               updates_collections=ops.GraphKeys.UPDATE_OPS,
                is_training=True,
                reuse=None,
                variables_collections=None,
@@ -138,8 +139,9 @@ def batch_norm(inputs,
       disabled since the scaling can be done by the next layer.
     epsilon: small float added to variance to avoid dividing by zero.
     activation_fn: Optional activation function.
-    updates_collection: collection to collect the update ops for computation. If
-      None a control dependency would be added to make sure they are computed.
+    updates_collections: collections to collect the update ops for computation.
+      If None, a control dependency would be added to make sure the updates are
+      computed.
     is_training: whether or not the layer is in training mode. In training mode
       it would accumulate the statistics of the moments into `moving_mean` and
       `moving_variance` using an exponential moving average with the given
@@ -207,7 +209,7 @@ def batch_norm(inputs,
           moving_mean, mean, decay)
       update_moving_variance = moving_averages.assign_moving_average(
           moving_variance, variance, decay)
-      if updates_collection is None:
+      if updates_collections is None:
         # Make sure the updates are computed here.
         with ops.control_dependencies([update_moving_mean,
                                        update_moving_variance]):
@@ -215,8 +217,8 @@ def batch_norm(inputs,
               inputs, mean, variance, beta, gamma, epsilon)
       else:
         # Collect the updates to be computed later.
-        ops.add_to_collection(updates_collection, update_moving_mean)
-        ops.add_to_collection(updates_collection, update_moving_variance)
+        ops.add_to_collections(updates_collections, update_moving_mean)
+        ops.add_to_collections(updates_collections, update_moving_variance)
         outputs = nn.batch_normalization(
             inputs, mean, variance, beta, gamma, epsilon)
     else:
@@ -504,22 +506,6 @@ def legacy_fully_connected(x,
   Raises:
     ValueError: if x has rank less than 2 or if its last dimension is not set.
   """
-  # pylint: enable=anomalous-backslash-in-string
-# TODO(ptucker) redirect to fully_connected
-#   _ = trainable
-#   variables_collections = {'weights': weight_collections,
-#                            'biases': bias_collections}
-#   outputs = fully_connected(inputs=x,
-#                             num_outputs=num_output_units,
-#                             activation_fn=activation_fn,
-#                             weights_initializer=weight_init,
-#                             weights_regularizer=weight_regularizer,
-#                             biases_initializer=bias_init,
-#                             biases_regularizer=bias_regularizer,
-#                             variables_collections=variables_collections,
-#                             scope=name)
-#   ops.add_to_collections(output_collections, outputs)
-#   return outputs
   with variable_scope.variable_op_scope([x], name, 'fully_connected'):
     dims = x.get_shape().dims
     if dims is None:
@@ -645,24 +631,6 @@ def legacy_convolution2d(x,
   Raises:
     ValueError: If `kernel_size` or `stride` are not length 2.
   """
-# TODO(ptucker) redirect to convolution2d
-#   _ = trainable
-#   variables_collections = {'weights': weight_collections,
-#                            'biases': bias_collections}
-#   outputs = convolution2d(inputs=x,
-#                           num_outputs=num_output_channels,
-#                           kernel_size=kernel_size,
-#                           stride=stride,
-#                           padding=padding,
-#                           activation_fn=activation_fn,
-#                           weights_initializer=weight_init,
-#                           weights_regularizer=weight_regularizer,
-#                           biases_initializer=bias_init,
-#                           biases_regularizer=bias_regularizer,
-#                           variables_collections=variables_collections,
-#                           scope=name)
-#   ops.add_to_collections(output_collections, outputs)
-#   return outputs
   with variable_scope.variable_op_scope([x], name, 'convolution2d'):
     num_input_channels = x.get_shape().dims[3].value
 
@@ -714,3 +682,6 @@ linear = legacy_linear
 relu = legacy_relu
 relu6 = legacy_relu6
 
+# Simple alias for convolution2d.
+conv2d = convolution2d
+
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index de073e573eb..0c3be3c98f7 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -430,8 +430,8 @@ class BatchNormTest(tf.test.TestCase):
     height, width = 3, 3
     with self.test_session():
       images = tf.random_uniform((5, height, width, 3), seed=1)
-      tf.contrib.layers.batch_norm(images, updates_collection='update_ops')
-      update_layers = tf.get_collection('update_ops')
+      tf.contrib.layers.batch_norm(images, updates_collections='my_update_ops')
+      update_layers = tf.get_collection('my_update_ops')
       update_moving_mean = update_layers[0]
       update_moving_variance = update_layers[1]
       self.assertEquals(update_moving_mean.op.name,
@@ -460,7 +460,7 @@ class BatchNormTest(tf.test.TestCase):
     with self.test_session():
       images = tf.random_uniform((5, height, width, 3), seed=1)
       with tf.contrib.framework.arg_scope([tf.contrib.layers.batch_norm],
-                                          updates_collection='update_ops'):
+                                          updates_collections='update_ops'):
         tf.contrib.layers.batch_norm(images, scope='bn')
         self.assertEquals(len(tf.get_collection('update_ops')), 2)
         tf.contrib.layers.batch_norm(images, scope='bn', reuse=True)
@@ -479,7 +479,7 @@ class BatchNormTest(tf.test.TestCase):
       self.assertEquals(len(moving_variance), 1)
       self.assertEquals(moving_variance[0].op.name, 'BatchNorm/moving_variance')
 
-  def testUpdateMovingVars(self):
+  def testForceUpdateMovingVars(self):
     height, width = 3, 3
     with self.test_session() as sess:
       image_shape = (10, height, width, 3)
@@ -487,7 +487,8 @@ class BatchNormTest(tf.test.TestCase):
       expected_mean = np.mean(image_values, axis=(0, 1, 2))
       expected_var = np.var(image_values, axis=(0, 1, 2))
       images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
-      output = tf.contrib.layers.batch_norm(images, decay=0.1)
+      output = tf.contrib.layers.batch_norm(images, decay=0.1,
+                                            updates_collections=None)
       # Initialize all variables
       sess.run(tf.initialize_all_variables())
       moving_mean = tf.contrib.framework.get_variables(
@@ -515,9 +516,8 @@ class BatchNormTest(tf.test.TestCase):
       expected_mean = np.mean(image_values, axis=(0, 1, 2))
       expected_var = np.var(image_values, axis=(0, 1, 2))
       images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
-      output = tf.contrib.layers.batch_norm(images, decay=0.1,
-                                            updates_collection='update_ops')
-      update_ops = tf.get_collection('update_ops')
+      output = tf.contrib.layers.batch_norm(images, decay=0.1)
+      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
       with tf.control_dependencies(update_ops):
         barrier = tf.no_op(name='barrier')
       output = control_flow_ops.with_dependencies([barrier], output)
@@ -550,10 +550,9 @@ class BatchNormTest(tf.test.TestCase):
       images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
       output = tf.contrib.layers.batch_norm(images,
                                             decay=0.1,
-                                            is_training=False,
-                                            updates_collection='update_ops')
-      update_layers = tf.get_collection('update_ops')
-      self.assertEquals(update_layers, [])
+                                            is_training=False)
+      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+      self.assertEquals(update_ops, [])
       # Initialize all variables
       sess.run(tf.initialize_all_variables())
       moving_mean = tf.contrib.framework.get_variables(
@@ -587,10 +586,9 @@ class BatchNormTest(tf.test.TestCase):
       images = tf.constant(image_values, shape=image_shape, dtype=tf.float32)
       output = tf.contrib.layers.batch_norm(images,
                                             decay=0.1,
-                                            is_training=False,
-                                            updates_collection='update_ops')
-      update_layers = tf.get_collection('update_ops')
-      self.assertEquals(update_layers, [])
+                                            is_training=False)
+      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+      self.assertEquals(update_ops, [])
       # Initialize all variables
       sess.run(tf.initialize_all_variables())
       moving_mean = tf.contrib.framework.get_variables(

From a9f3979264e649122e19ce8adedc4022fd6627ed Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <nobody@tensorflow.org>
Date: Wed, 25 May 2016 10:15:48 -0800
Subject: [PATCH 04/20] StreamExecutor add CUDA support for
 cudnnConvolutionBackwardBias Change: 123233121

---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 67 +++++++++++++++++++++
 tensorflow/stream_executor/cuda/cuda_dnn.h  | 26 ++++++++
 tensorflow/stream_executor/dnn.h            | 37 ++++++++++++
 tensorflow/stream_executor/stream.cc        | 51 ++++++++++++++++
 tensorflow/stream_executor/stream.h         | 24 ++++++++
 5 files changed, 205 insertions(+)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 8f2b3d1c7c2..15aeee645c6 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -184,6 +184,7 @@ bool IsCudnnR2() {
   __macro(cudnnSetStream)                                 \
   __macro(cudnnActivationForward)                         \
   __macro(cudnnConvolutionForward)                        \
+  __macro(cudnnConvolutionBackwardBias)                   \
   __macro(cudnnGetConvolutionForwardWorkspaceSize)        \
   __macro(cudnnTransformTensor)                           \
   __macro(cudnnSetConvolutionNdDescriptor)                \
@@ -1493,6 +1494,72 @@ bool CudnnSupport::DoConvolveBackwardFilter(
       algorithm, output_profile_result);
 }
 
+template <class T>
+bool CudnnSupport::DoConvolveBackwardBiasImpl(
+    Stream* stream, int cudnn_type,  // Actually cudnnDataType_t.
+    const dnn::BatchDescriptor& input_descriptor,
+    const DeviceMemory<T>& input_data,
+    const dnn::BatchDescriptor& bias_descriptor,
+    DeviceMemory<T>* backward_bias_data) {
+  mutex_lock lock{dnn_handle_mutex_};
+  auto status = dynload::cudnnSetStream(parent_, ToHandle(dnn_handle_),
+                                        AsCUDAStreamValue(stream));
+  if (status != CUDNN_STATUS_SUCCESS) {
+    LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status);
+  }
+
+  ScopedTensorDescriptor input_nd{parent_, input_descriptor,
+                                  static_cast<cudnnDataType_t>(cudnn_type)};
+  ScopedTensorDescriptor bias_nd{parent_, bias_descriptor,
+                                 static_cast<cudnnDataType_t>(cudnn_type)};
+
+  // Alpha is the scaling factor for input.
+  float alpha = 1.0;
+  // Beta is the scaling factor for output.
+  float beta = 0.0;
+
+  status = dynload::cudnnConvolutionBackwardBias(
+      parent_, ToHandle(dnn_handle_), &alpha, input_nd.handle(),
+      input_data.opaque(), &beta, bias_nd.handle(),
+      backward_bias_data->opaque());
+  if (status != CUDNN_STATUS_SUCCESS) {
+    LOG(FATAL) << "failed to enqueue backward convolution on stream: "
+               << ToString(status);
+    return false;
+  }
+  return true;
+}
+
+bool CudnnSupport::DoConvolveBackwardBias(
+    Stream* stream, const BatchDescriptor& input_descriptor,
+    const DeviceMemory<double>& input_data,
+    const BatchDescriptor& bias_descriptor,
+    DeviceMemory<double>* backward_bias_data) {
+  return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_DOUBLE, input_descriptor,
+                                    input_data, bias_descriptor,
+                                    backward_bias_data);
+}
+
+bool CudnnSupport::DoConvolveBackwardBias(
+    Stream* stream, const BatchDescriptor& input_descriptor,
+    const DeviceMemory<float>& input_data,
+    const BatchDescriptor& bias_descriptor,
+    DeviceMemory<float>* backward_bias_data) {
+  return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_FLOAT, input_descriptor,
+                                    input_data, bias_descriptor,
+                                    backward_bias_data);
+}
+
+bool CudnnSupport::DoConvolveBackwardBias(
+    Stream* stream, const BatchDescriptor& input_descriptor,
+    const DeviceMemory<Eigen::half>& input_data,
+    const BatchDescriptor& bias_descriptor,
+    DeviceMemory<Eigen::half>* backward_bias_data) {
+  return DoConvolveBackwardBiasImpl(stream, CUDNN_DATA_HALF, input_descriptor,
+                                    input_data, bias_descriptor,
+                                    backward_bias_data);
+}
+
 bool CudnnSupport::DoMatMul(Stream* stream,
                             const DeviceMemory<float>& input_data,
                             const DeviceMemory<float>& weights,
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index 9388969770d..e3c9175e019 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -140,6 +140,24 @@ class CudnnSupport : public dnn::DnnSupport {
       ScratchAllocator* scratch_allocator, dnn::AlgorithmType algorithm,
       dnn::ProfileResult* output_profile_result) override;
 
+  bool DoConvolveBackwardBias(
+      Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+      const DeviceMemory<double>& input_data,
+      const dnn::BatchDescriptor& bias_descriptor,
+      DeviceMemory<double>* backward_bias_data) override;
+
+  bool DoConvolveBackwardBias(Stream* stream,
+                              const dnn::BatchDescriptor& input_descriptor,
+                              const DeviceMemory<float>& input_data,
+                              const dnn::BatchDescriptor& bias_descriptor,
+                              DeviceMemory<float>* backward_bias_data) override;
+
+  bool DoConvolveBackwardBias(
+      Stream* stream, const dnn::BatchDescriptor& input_descriptor,
+      const DeviceMemory<Eigen::half>& input_data,
+      const dnn::BatchDescriptor& bias_descriptor,
+      DeviceMemory<Eigen::half>* backward_bias_data) override;
+
   bool DoMatMul(Stream* stream, const DeviceMemory<float>& input_data,
                 const DeviceMemory<float>& weights,
                 const dnn::BatchDescriptor& input_dimensions,
@@ -311,6 +329,14 @@ class CudnnSupport : public dnn::DnnSupport {
       dnn::AlgorithmType algorithm,
       dnn::ProfileResult* output_profile_result);
 
+  template <class T>
+  bool DoConvolveBackwardBiasImpl(Stream* stream,
+                                  int cudnn_type,  // Actually cudnnDataType_t.
+                                  const dnn::BatchDescriptor& input_descriptor,
+                                  const DeviceMemory<T>& input_data,
+                                  const dnn::BatchDescriptor& bias_descriptor,
+                                  DeviceMemory<T>* backward_bias_data);
+
   SE_DISALLOW_COPY_AND_ASSIGN(CudnnSupport);
 };
 
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 01c457c90c7..6eaadcadc20 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -849,6 +849,43 @@ class DnnSupport {
       ScratchAllocator* scratch_allocator, AlgorithmType algorithm,
       ProfileResult* output_profile_result) = 0;
 
+  // Enqueues a single-precision backward convolution (for bias) operation onto
+  // the stream.
+  //
+  // Arguments:
+  //  stream: borrowed pointer to the stream that the 'convolve' operation
+  //    should be enqueued onto.
+  //  input_descriptor: dimensions of the input layer.
+  //  input_data: un-owned device memory region which contains the
+  //    convolution input.
+  //  bias_descriptor: dimensions of the bias tensor. Should be the same as the
+  //    input dimensions, but with the spatial dimensions set to 1.
+  //  backward_filter_data: un-owned device memory region in which to place the
+  //    backprop of the bias.
+  virtual bool DoConvolveBackwardBias(Stream* stream,
+                                      const BatchDescriptor& input_descriptor,
+                                      const DeviceMemory<float>& input_data,
+                                      const BatchDescriptor& bias_descriptor,
+                                      DeviceMemory<float>* backward_bias_data) {
+    return false;
+  }
+
+  virtual bool DoConvolveBackwardBias(
+      Stream* stream, const BatchDescriptor& input_descriptor,
+      const DeviceMemory<double>& input_data,
+      const BatchDescriptor& bias_descriptor,
+      DeviceMemory<double>* backward_bias_data) {
+    return false;
+  }
+
+  virtual bool DoConvolveBackwardBias(
+      Stream* stream, const BatchDescriptor& input_descriptor,
+      const DeviceMemory<Eigen::half>& input_data,
+      const BatchDescriptor& bias_descriptor,
+      DeviceMemory<Eigen::half>* backward_bias_data) {
+    return false;
+  }
+
   // Fully connects the "nodes" (float values) in input_data with
   // shape input_dimensions to output_data with output_dimensions
   // using provided weights. This is equivalent to computing a matrix
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 57a10b84f3b..3d264989026 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -741,6 +741,57 @@ Stream &Stream::ThenConvolveBackwardFilter(
       /*scratch_allocator=*/nullptr);
 }
 
+template <typename T>
+Stream &Stream::ThenConvolveBackwardBiasImpl(
+    const dnn::BatchDescriptor &input_descriptor,
+    const DeviceMemory<T> &input_data,
+    const dnn::BatchDescriptor &bias_descriptor,
+    DeviceMemory<T> *backward_bias_data) {
+  VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), PARAM(bias_descriptor),
+            PARAM(backward_bias_data));
+
+  if (ok()) {
+    if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
+      CheckError(dnn->DoConvolveBackwardBias(this, input_descriptor, input_data,
+                                             bias_descriptor,
+                                             backward_bias_data));
+    } else {
+      SetError();
+      LOG(WARNING)
+          << "attempting to perform DNN operation using StreamExecutor "
+             "without DNN support";
+    }
+  }
+  return *this;
+}
+
+Stream &Stream::ThenConvolveBackwardBias(
+    const dnn::BatchDescriptor &input_descriptor,
+    const DeviceMemory<double> &input_data,
+    const dnn::BatchDescriptor &bias_descriptor,
+    DeviceMemory<double> *backward_bias_data) {
+  return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
+                                      bias_descriptor, backward_bias_data);
+}
+
+Stream &Stream::ThenConvolveBackwardBias(
+    const dnn::BatchDescriptor &input_descriptor,
+    const DeviceMemory<float> &input_data,
+    const dnn::BatchDescriptor &bias_descriptor,
+    DeviceMemory<float> *backward_bias_data) {
+  return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
+                                      bias_descriptor, backward_bias_data);
+}
+
+Stream &Stream::ThenConvolveBackwardBias(
+    const dnn::BatchDescriptor &input_descriptor,
+    const DeviceMemory<Eigen::half> &input_data,
+    const dnn::BatchDescriptor &bias_descriptor,
+    DeviceMemory<Eigen::half> *backward_bias_data) {
+  return ThenConvolveBackwardBiasImpl(input_descriptor, input_data,
+                                      bias_descriptor, backward_bias_data);
+}
+
 Stream &Stream::ThenMatMul(const DeviceMemory<float> &input_data,
                            const DeviceMemory<float> &weights,
                            const dnn::BatchDescriptor &input_dimensions,
diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h
index f5583d62215..b14bf06cdc8 100644
--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -371,6 +371,22 @@ class Stream {
       ScratchAllocator *scratch_allocator, dnn::AlgorithmType algorithm,
       dnn::ProfileResult *output_profile_result);
 
+  Stream &ThenConvolveBackwardBias(const dnn::BatchDescriptor &input_descriptor,
+                                   const DeviceMemory<double> &input_data,
+                                   const dnn::BatchDescriptor &bias_descriptor,
+                                   DeviceMemory<double> *backward_bias_data);
+
+  Stream &ThenConvolveBackwardBias(const dnn::BatchDescriptor &input_descriptor,
+                                   const DeviceMemory<float> &input_data,
+                                   const dnn::BatchDescriptor &bias_descriptor,
+                                   DeviceMemory<float> *backward_bias_data);
+
+  Stream &ThenConvolveBackwardBias(
+      const dnn::BatchDescriptor &input_descriptor,
+      const DeviceMemory<Eigen::half> &input_data,
+      const dnn::BatchDescriptor &bias_descriptor,
+      DeviceMemory<Eigen::half> *backward_bias_data);
+
   Stream &ThenMatMul(const DeviceMemory<float> &input_data,
                      const DeviceMemory<float> &weights,
                      const dnn::BatchDescriptor &input_dimensions,
@@ -1439,6 +1455,14 @@ class Stream {
   // BlockHostUntilDone() is called.
   internal::TemporaryMemoryManager temporary_memory_manager_;
 
+  // Implementation of ThenConvolveBackwardBias that is shared by all types.
+  template <typename T>
+  Stream &ThenConvolveBackwardBiasImpl(
+      const dnn::BatchDescriptor &input_descriptor,
+      const DeviceMemory<T> &input_data,
+      const dnn::BatchDescriptor &bias_descriptor,
+      DeviceMemory<T> *backward_bias_data);
+
   SE_DISALLOW_COPY_AND_ASSIGN(Stream);
 };
 

From b65ca18eb8dface5528f760e1c3a1ea6775de3ca Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Wed, 25 May 2016 11:04:03 -0800
Subject: [PATCH 05/20] Upgraded to the latest version of Eigen that supports
 convolutions on fp16 Change: 123238579

---
 eigen.BUILD                                       | 2 +-
 tensorflow/contrib/cmake/external/eigen.cmake     | 4 ++--
 tensorflow/workspace.bzl                          | 4 ++--
 third_party/eigen3/Eigen/Cholesky                 | 2 +-
 third_party/eigen3/Eigen/Core                     | 2 +-
 third_party/eigen3/Eigen/Eigenvalues              | 2 +-
 third_party/eigen3/Eigen/LU                       | 2 +-
 third_party/eigen3/Eigen/QR                       | 2 +-
 third_party/eigen3/unsupported/Eigen/CXX11/Tensor | 2 +-
 9 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/eigen.BUILD b/eigen.BUILD
index a657493380b..16dd4f84228 100644
--- a/eigen.BUILD
+++ b/eigen.BUILD
@@ -1,6 +1,6 @@
 package(default_visibility = ["//visibility:public"])
 
-archive_dir = "eigen-eigen-a5e9085a94e8"
+archive_dir = "eigen-eigen-f3a13643ac1f"
 
 cc_library(
     name = "eigen",
diff --git a/tensorflow/contrib/cmake/external/eigen.cmake b/tensorflow/contrib/cmake/external/eigen.cmake
index 42fa7686632..c1929a10f32 100644
--- a/tensorflow/contrib/cmake/external/eigen.cmake
+++ b/tensorflow/contrib/cmake/external/eigen.cmake
@@ -7,7 +7,7 @@
 
 include (ExternalProject)
 
-set(eigen_archive_hash "a5e9085a94e8")
+set(eigen_archive_hash "f3a13643ac1f")
 
 set(eigen_INCLUDE_DIRS
     ${CMAKE_CURRENT_BINARY_DIR}
@@ -16,7 +16,7 @@ set(eigen_INCLUDE_DIRS
     ${tensorflow_source_dir}/third_party/eigen3
 )
 set(eigen_URL https://bitbucket.org/eigen/eigen/get/${eigen_archive_hash}.tar.gz)
-set(eigen_HASH SHA256=967126237829c7c87abb6cd0e13a5a235b0377d51575522c390b9486aed13e71)
+set(eigen_HASH SHA256=a9266e60366cddb371a23d86b11a297eee86372a89ef4b38a3509012f9cc37ec)
 set(eigen_BUILD ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen)
 set(eigen_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/eigen/install)
 
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 7c68fb763fa..b95f84ce5e4 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -13,8 +13,8 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
   native.new_http_archive(
     name = "eigen_archive",
-    url = "https://bitbucket.org/eigen/eigen/get/a5e9085a94e8.tar.gz",
-    sha256 = "967126237829c7c87abb6cd0e13a5a235b0377d51575522c390b9486aed13e71",
+    url = "https://bitbucket.org/eigen/eigen/get/f3a13643ac1f.tar.gz",
+    sha256 = "a9266e60366cddb371a23d86b11a297eee86372a89ef4b38a3509012f9cc37ec",
     build_file = path_prefix + "eigen.BUILD",
   )
 
diff --git a/third_party/eigen3/Eigen/Cholesky b/third_party/eigen3/Eigen/Cholesky
index ca263316709..7b196a89043 100644
--- a/third_party/eigen3/Eigen/Cholesky
+++ b/third_party/eigen3/Eigen/Cholesky
@@ -1 +1 @@
-#include "eigen-eigen-a5e9085a94e8/Eigen/Cholesky"
+#include "eigen-eigen-f3a13643ac1f/Eigen/Cholesky"
diff --git a/third_party/eigen3/Eigen/Core b/third_party/eigen3/Eigen/Core
index 1e6ac595cc5..97361e51834 100644
--- a/third_party/eigen3/Eigen/Core
+++ b/third_party/eigen3/Eigen/Core
@@ -1 +1 @@
-#include "eigen-eigen-a5e9085a94e8/Eigen/Core"
+#include "eigen-eigen-f3a13643ac1f/Eigen/Core"
diff --git a/third_party/eigen3/Eigen/Eigenvalues b/third_party/eigen3/Eigen/Eigenvalues
index 480d9079b03..a5f98ed8702 100644
--- a/third_party/eigen3/Eigen/Eigenvalues
+++ b/third_party/eigen3/Eigen/Eigenvalues
@@ -1 +1 @@
-#include "eigen-eigen-a5e9085a94e8/Eigen/Eigenvalues"
+#include "eigen-eigen-f3a13643ac1f/Eigen/Eigenvalues"
diff --git a/third_party/eigen3/Eigen/LU b/third_party/eigen3/Eigen/LU
index 0e82ebb8fc9..5172aece6cf 100644
--- a/third_party/eigen3/Eigen/LU
+++ b/third_party/eigen3/Eigen/LU
@@ -1 +1 @@
-#include "eigen-eigen-a5e9085a94e8/Eigen/LU"
+#include "eigen-eigen-f3a13643ac1f/Eigen/LU"
diff --git a/third_party/eigen3/Eigen/QR b/third_party/eigen3/Eigen/QR
index 13562bca3cd..bd59f7adf20 100644
--- a/third_party/eigen3/Eigen/QR
+++ b/third_party/eigen3/Eigen/QR
@@ -1 +1 @@
-#include "eigen-eigen-a5e9085a94e8/Eigen/QR"
+#include "eigen-eigen-f3a13643ac1f/Eigen/QR"
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
index a9b263f5ae3..8d363c3845f 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
@@ -1 +1 @@
-#include "eigen-eigen-a5e9085a94e8/unsupported/Eigen/CXX11/Tensor"
+#include "eigen-eigen-f3a13643ac1f/unsupported/Eigen/CXX11/Tensor"

From 468aff2fc6133cfed3cac44c11aa2b96d0e220ad Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Wed, 25 May 2016 11:11:26 -0800
Subject: [PATCH 06/20] Made the spatial convolution code usable from gpu
 Change: 123239331

---
 .../core/kernels/eigen_spatial_convolutions.h | 78 +++++++++++--------
 1 file changed, 44 insertions(+), 34 deletions(-)

diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions.h b/tensorflow/core/kernels/eigen_spatial_convolutions.h
index a99bb6a092d..774436bacd8 100644
--- a/tensorflow/core/kernels/eigen_spatial_convolutions.h
+++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h
@@ -61,6 +61,7 @@ class TensorContractionInputMapper<
   typedef SubMapper LinearMapper;
   typedef typename packet_traits<Scalar>::type Packet;
 
+  EIGEN_DEVICE_FUNC
   TensorContractionInputMapper(
       const TensorEvaluator<
           const TensorReshapingOp<
@@ -77,7 +78,7 @@ class TensorContractionInputMapper<
       m_patch_cols = tensor.impl().dimensions()[2];
       m_num_patches = tensor.impl().dimensions()[3];
     } else {
-      static const int NumDims = tensor.impl().dimensions().size();
+      const int NumDims = tensor.impl().dimensions().size();
       patch_depth = tensor.impl().dimensions()[NumDims - 1];
       patch_rows = tensor.impl().dimensions()[NumDims - 2];
       m_patch_cols = tensor.impl().dimensions()[NumDims - 3];
@@ -99,7 +100,7 @@ class TensorContractionInputMapper<
       m_inputRows = tensor.impl().impl().dimensions()[1];
       m_inputCols = tensor.impl().impl().dimensions()[2];
     } else {
-      static const int NumDims = tensor.impl().impl().dimensions().size();
+      const int NumDims = tensor.impl().impl().dimensions().size();
       m_inputRows = tensor.impl().impl().dimensions()[NumDims - 2];
       m_inputCols = tensor.impl().impl().dimensions()[NumDims - 3];
     }
@@ -121,6 +122,7 @@ class TensorContractionInputMapper<
     m_fastDimZero = internal::TensorIntDivisor<Index>(patch_depth);
   }
 
+  EIGEN_DEVICE_FUNC
   TensorContractionInputMapper(const TensorContractionInputMapper& base_mapper)
       : m_impl(base_mapper.m_impl) {
     m_patch_cols = base_mapper.m_patch_cols;
@@ -650,8 +652,10 @@ struct gemm_pack_rhs<
       SubMapper;
   typedef SubMapper DataMapper;
 
+  EIGEN_DEVICE_FUNC
   static inline Index ceil_div(Index a, Index b) { return (a + b - 1) / b; }
 
+  EIGEN_DEVICE_FUNC
   EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs,
                                     Index depth, Index cols, Index stride = 0,
                                     Index offset = 0) const {
@@ -822,8 +826,10 @@ struct gemm_pack_rhs<
       SubMapper;
   typedef SubMapper DataMapper;
 
+  EIGEN_DEVICE_FUNC
   static inline Index ceil_div(Index a, Index b) { return (a + b - 1) / b; }
 
+  EIGEN_DEVICE_FUNC
   EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs,
                                     Index depth, Index cols, Index stride = 0,
                                     Index offset = 0) const {
@@ -898,36 +904,40 @@ struct gemm_pack_rhs<
   *
   */
 template <typename Input, typename Kernel>
-EIGEN_ALWAYS_INLINE static const typename internal::conditional<
-    internal::traits<Input>::Layout == ColMajor,
-    TensorReshapingOp<
-        const DSizes<typename internal::traits<Input>::Index,
-                     internal::traits<Input>::NumDimensions>,
-        const TensorContractionOp<
-            const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
-            const TensorReshapingOp<
-                const DSizes<typename internal::traits<Input>::Index, 2>,
-                const Kernel>,
-            const TensorReshapingOp<
-                const DSizes<typename internal::traits<Input>::Index, 2>,
-                const TensorImagePatchOp<Dynamic, Dynamic, const Input> > > >,
-    TensorReshapingOp<
-        const DSizes<typename internal::traits<Input>::Index,
-                     internal::traits<Input>::NumDimensions>,
-        const TensorContractionOp<
-            const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
-            const TensorReshapingOp<
-                const DSizes<typename internal::traits<Input>::Index, 2>,
-                const TensorImagePatchOp<Dynamic, Dynamic, const Input> >,
-            const TensorReshapingOp<
-                const DSizes<typename internal::traits<Input>::Index, 2>,
-                const Kernel> > > >::type
-SpatialConvolution(const Input& input, const Kernel& kernel,
-                   const DenseIndex row_stride = 1,
-                   const DenseIndex col_stride = 1,
-                   const PaddingType padding_type = PADDING_SAME,
-                   const DenseIndex row_in_stride = 1,
-                   const DenseIndex col_in_stride = 1) {
+EIGEN_DEVICE_FUNC
+    EIGEN_ALWAYS_INLINE static const typename internal::conditional<
+        internal::traits<Input>::Layout == ColMajor,
+        TensorReshapingOp<
+            const DSizes<typename internal::traits<Input>::Index,
+                         internal::traits<Input>::NumDimensions>,
+            const TensorContractionOp<
+                const array<IndexPair<typename internal::traits<Input>::Index>,
+                            1>,
+                const TensorReshapingOp<
+                    const DSizes<typename internal::traits<Input>::Index, 2>,
+                    const Kernel>,
+                const TensorReshapingOp<
+                    const DSizes<typename internal::traits<Input>::Index, 2>,
+                    const TensorImagePatchOp<Dynamic, Dynamic,
+                                             const Input> > > >,
+        TensorReshapingOp<
+            const DSizes<typename internal::traits<Input>::Index,
+                         internal::traits<Input>::NumDimensions>,
+            const TensorContractionOp<
+                const array<IndexPair<typename internal::traits<Input>::Index>,
+                            1>,
+                const TensorReshapingOp<
+                    const DSizes<typename internal::traits<Input>::Index, 2>,
+                    const TensorImagePatchOp<Dynamic, Dynamic, const Input> >,
+                const TensorReshapingOp<
+                    const DSizes<typename internal::traits<Input>::Index, 2>,
+                    const Kernel> > > >::type
+    SpatialConvolution(const Input& input, const Kernel& kernel,
+                       const DenseIndex row_stride = 1,
+                       const DenseIndex col_stride = 1,
+                       const PaddingType padding_type = PADDING_SAME,
+                       const DenseIndex row_in_stride = 1,
+                       const DenseIndex col_in_stride = 1) {
   typedef typename internal::traits<Input>::Index TensorIndex;
   TensorRef<Tensor<typename internal::traits<Input>::Scalar,
                    internal::traits<Input>::NumDimensions,
@@ -941,9 +951,9 @@ SpatialConvolution(const Input& input, const Kernel& kernel,
   EIGEN_STATIC_ASSERT(
       internal::traits<Input>::Layout == internal::traits<Kernel>::Layout,
       YOU_MADE_A_PROGRAMMING_MISTAKE);
-  static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
+  const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
 
-  static const int NumDims = internal::traits<Input>::NumDimensions;
+  const int NumDims = internal::traits<Input>::NumDimensions;
 
   // Number of filters to apply. This is the same as the output depth of the
   // result

From 13dc98fd9587c5ad2423757de38a44251c87db56 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <nobody@tensorflow.org>
Date: Wed, 25 May 2016 12:11:14 -0800
Subject: [PATCH 07/20] Documentation typo: `tf.float` should be `tf.float32`
 in `parse_example`. Change: 123244915

---
 tensorflow/python/ops/parsing_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index fa6696cbbc0..9d3a135cf0c 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -225,7 +225,7 @@ def parse_example(serialized, features, name=None, example_names=None):
   features: {
       "kw": VarLenFeature(tf.string),
       "dank": VarLenFeature(tf.int64),
-      "gps": VarLenFeature(tf.float),
+      "gps": VarLenFeature(tf.float32),
   }
   ```
 

From 8074e98b20c425b3ca4f51a4cba2cbad35d0413d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <nobody@tensorflow.org>
Date: Wed, 25 May 2016 12:39:30 -0800
Subject: [PATCH 08/20] Pass -O3 when building tensorflow with clang -c opt. 
 (This isn't hooked up to the OSS build yet, we're working on it.) Change:
 123248081

---
 tensorflow/tensorflow.bzl   | 5 +++++
 third_party/gpus/cuda/BUILD | 9 +++++++++
 2 files changed, 14 insertions(+)

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index acc01eae849..1e73e00a3f4 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -314,6 +314,11 @@ def _cuda_copts():
                 "--cuda-gpu-arch=sm_35",
             ]
         ),
+    }) + select({
+        # Pass -O3 when building CUDA code with clang; some important
+        # optimizations are not enabled at O2.
+        "//third_party/gpus/cuda:using_clang_opt": ["-O3"],
+        "//conditions:default": [],
     })
 
 # Build defs for TensorFlow kernels
diff --git a/third_party/gpus/cuda/BUILD b/third_party/gpus/cuda/BUILD
index a0d1d6561b0..b68104385d6 100644
--- a/third_party/gpus/cuda/BUILD
+++ b/third_party/gpus/cuda/BUILD
@@ -31,6 +31,15 @@ config_setting(
     },
 )
 
+# Equivalent to using_clang && -c opt.
+config_setting(
+    name = "using_clang_opt",
+    values = {
+        "define": "using_cuda_clang=true",
+        "compilation_mode": "opt",
+    },
+)
+
 config_setting(
     name = "darwin",
     values = {"cpu": "darwin"},

From 606fbb46eb20c795eacd9bec056df062c6760792 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <nobody@tensorflow.org>
Date: Wed, 25 May 2016 12:41:42 -0800
Subject: [PATCH 09/20] BUGFIX:  Call n = convert_to_tensor(n).
 Exponential.sample forgot to call n = convert_to_tensor(n), and
 tensor_util.constant_value(n) only works with n a Tensor. Change: 123248349

---
 .../distributions/python/kernel_tests/exponential_test.py    | 5 ++---
 tensorflow/contrib/distributions/python/ops/exponential.py   | 1 +
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/distributions/python/kernel_tests/exponential_test.py b/tensorflow/contrib/distributions/python/kernel_tests/exponential_test.py
index 5e3fed1ed80..6fd03e90bf6 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/exponential_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/exponential_test.py
@@ -105,10 +105,9 @@ class ExponentialTest(tf.test.TestCase):
 
       exponential = tf.contrib.distributions.Exponential(lam=lam)
 
-      n_v = 100000
-      n = tf.constant(n_v)
+      n = 100000
       samples = exponential.sample(n, seed=138)
-      self.assertEqual(samples.get_shape(), (n_v, batch_size, 2))
+      self.assertEqual(samples.get_shape(), (n, batch_size, 2))
 
       sample_values = samples.eval()
 
diff --git a/tensorflow/contrib/distributions/python/ops/exponential.py b/tensorflow/contrib/distributions/python/ops/exponential.py
index b80632fc496..4a93c210b91 100644
--- a/tensorflow/contrib/distributions/python/ops/exponential.py
+++ b/tensorflow/contrib/distributions/python/ops/exponential.py
@@ -70,6 +70,7 @@ class Exponential(gamma.Gamma):
     """
     broadcast_shape = self._lam.get_shape()
     with ops.op_scope([self.lam, n], name, "ExponentialSample"):
+      n = ops.convert_to_tensor(n, name="n")
       shape = array_ops.concat(
           0, [array_ops.pack([n]), array_ops.shape(self._lam)])
       sampled = random_ops.random_uniform(

From fd5ebfa76850b4870f7ab4929616a60852a2800e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <nobody@tensorflow.org>
Date: Wed, 25 May 2016 12:50:40 -0800
Subject: [PATCH 10/20] Update generated Python Op docs. Change: 123249409

---
 .../api_docs/python/functions_and_classes/tf.parse_example.md   | 2 +-
 tensorflow/g3doc/api_docs/python/io_ops.md                      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/tf.parse_example.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/tf.parse_example.md
index 9a7476475ec..2f2f5111963 100644
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/tf.parse_example.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/tf.parse_example.md
@@ -74,7 +74,7 @@ example_names: ["input0", "input1"],
 features: {
     "kw": VarLenFeature(tf.string),
     "dank": VarLenFeature(tf.int64),
-    "gps": VarLenFeature(tf.float),
+    "gps": VarLenFeature(tf.float32),
 }
 ```
 
diff --git a/tensorflow/g3doc/api_docs/python/io_ops.md b/tensorflow/g3doc/api_docs/python/io_ops.md
index 127b461e4d2..61d01910524 100644
--- a/tensorflow/g3doc/api_docs/python/io_ops.md
+++ b/tensorflow/g3doc/api_docs/python/io_ops.md
@@ -1289,7 +1289,7 @@ example_names: ["input0", "input1"],
 features: {
     "kw": VarLenFeature(tf.string),
     "dank": VarLenFeature(tf.int64),
-    "gps": VarLenFeature(tf.float),
+    "gps": VarLenFeature(tf.float32),
 }
 ```
 

From 5c145f0e3cabc7e61440ddf32c1ac28f5b9d499e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <nobody@tensorflow.org>
Date: Wed, 25 May 2016 13:01:01 -0800
Subject: [PATCH 11/20] Fix copyrights and a few other lint errors. Change:
 123250570

---
 .../contrib/learn/python/learn/__init__.py    |   5 +-
 .../learn/python/learn/datasets/base.py       |   5 +-
 .../learn/python/learn/estimators/__init__.py |   9 +-
 .../learn/python/learn/estimators/_sklearn.py |  21 +-
 .../python/learn/estimators/autoencoder.py    | 189 ++++++++--------
 .../learn/python/learn/estimators/base.py     |  10 +-
 .../learn/python/learn/estimators/dnn.py      |   6 +-
 .../learn/python/learn/estimators/linear.py   |   6 +-
 .../learn/python/learn/estimators/rnn.py      |   6 +-
 .../learn/python/learn/io/data_feeder.py      |   6 +-
 .../contrib/learn/python/learn/models.py      |  70 +++---
 .../contrib/learn/python/learn/monitors.py    |   5 +-
 tensorflow/examples/skflow/boston.py          |   2 +-
 tensorflow/examples/skflow/iris.py            |   2 +-
 .../examples/skflow/iris_custom_decay_dnn.py  |   2 +-
 tensorflow/examples/skflow/mnist.py           |   2 +-
 tensorflow/examples/skflow/resnet.py          | 204 +++++++++---------
 17 files changed, 294 insertions(+), 256 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/__init__.py b/tensorflow/contrib/learn/python/learn/__init__.py
index 8de7797e6b7..1d72243f992 100644
--- a/tensorflow/contrib/learn/python/learn/__init__.py
+++ b/tensorflow/contrib/learn/python/learn/__init__.py
@@ -1,5 +1,4 @@
-"""Main Scikit Flow module."""
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,6 +12,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
+"""High level API for learning with TensorFlow."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/tensorflow/contrib/learn/python/learn/datasets/base.py b/tensorflow/contrib/learn/python/learn/datasets/base.py
index 7f78b2dced9..9c29b9eeb11 100644
--- a/tensorflow/contrib/learn/python/learn/datasets/base.py
+++ b/tensorflow/contrib/learn/python/learn/datasets/base.py
@@ -1,5 +1,4 @@
-"""Base utilities for loading datasets."""
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,6 +12,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
+"""Base utilities for loading datasets."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/tensorflow/contrib/learn/python/learn/estimators/__init__.py b/tensorflow/contrib/learn/python/learn/estimators/__init__.py
index e714c15f2e0..1b0d0aef6f5 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/__init__.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/__init__.py
@@ -1,5 +1,4 @@
-"""Scikit Flow Estimators."""
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,12 +11,16 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+
+"""Estimators."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
 from tensorflow.contrib.learn.python.learn.estimators.autoencoder import TensorFlowDNNAutoencoder
-from tensorflow.contrib.learn.python.learn.estimators.base import TensorFlowEstimator, TensorFlowBaseTransformer
+from tensorflow.contrib.learn.python.learn.estimators.base import TensorFlowBaseTransformer
+from tensorflow.contrib.learn.python.learn.estimators.base import TensorFlowEstimator
 from tensorflow.contrib.learn.python.learn.estimators.dnn import DNNClassifier
 from tensorflow.contrib.learn.python.learn.estimators.dnn import DNNRegressor
 from tensorflow.contrib.learn.python.learn.estimators.dnn import TensorFlowDNNClassifier
diff --git a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py
index dcd1d81056b..5032ea966d4 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py
@@ -1,5 +1,4 @@
-"""sklearn cross-support."""
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,6 +11,9 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+
+"""sklearn cross-support."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -20,6 +22,8 @@ import collections
 import os
 
 import numpy as np
+import six
+
 
 def _pprint(d):
   return ', '.join(['%s=%s' % (key, str(value)) for key, value in d.items()])
@@ -102,6 +106,7 @@ class _BaseEstimator(object):
                        _pprint(self.get_params(deep=False)),)
 
 
+# pylint: disable=old-style-class
 class _ClassifierMixin():
   """Mixin class for all classifiers."""
   pass
@@ -111,8 +116,10 @@ class _RegressorMixin():
   """Mixin class for all regression estimators."""
   pass
 
+
 class _TransformerMixin():
-    """Mixin class for all transformer estimators."""
+  """Mixin class for all transformer estimators."""
+
 
 class _NotFittedError(ValueError, AttributeError):
   """Exception class to raise if estimator is used before fitting.
@@ -134,6 +141,8 @@ class _NotFittedError(ValueError, AttributeError):
   https://github.com/scikit-learn/scikit-learn/master/sklearn/exceptions.py
   """
 
+# pylint: enable=old-style-class
+
 
 def _accuracy_score(y_true, y_pred):
   score = y_true == y_pred
@@ -149,8 +158,7 @@ def _mean_squared_error(y_true, y_pred):
 
 
 def _train_test_split(*args, **options):
-  n_array = len(args)
-
+  # pylint: disable=missing-docstring
   test_size = options.pop('test_size', None)
   train_size = options.pop('train_size', None)
   random_state = options.pop('random_state', None)
@@ -159,7 +167,7 @@ def _train_test_split(*args, **options):
     train_size = 0.75
   elif train_size is None:
     train_size = 1 - test_size
-  train_size = train_size * args[0].shape[0]
+  train_size *= args[0].shape[0]
 
   np.random.seed(random_state)
   indices = np.random.permutation(args[0].shape[0])
@@ -173,6 +181,7 @@ def _train_test_split(*args, **options):
 # If "TENSORFLOW_SKLEARN" flag is defined then try to import from sklearn.
 TRY_IMPORT_SKLEARN = os.environ.get('TENSORFLOW_SKLEARN', False)
 if TRY_IMPORT_SKLEARN:
+  # pylint: disable=g-import-not-at-top,g-multiple-import,unused-import
   from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin, TransformerMixin
   from sklearn.metrics import accuracy_score, log_loss, mean_squared_error
   from sklearn.cross_validation import train_test_split
diff --git a/tensorflow/contrib/learn/python/learn/estimators/autoencoder.py b/tensorflow/contrib/learn/python/learn/estimators/autoencoder.py
index 690bac8f196..a3f41697680 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/autoencoder.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/autoencoder.py
@@ -1,5 +1,4 @@
-"""Deep Autoencoder estimators."""
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,105 +11,115 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+
+"""Deep Autoencoder estimators."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.ops import nn
-from tensorflow.contrib.learn.python.learn.estimators.base import TensorFlowBaseTransformer
+import numpy as np
+
 from tensorflow.contrib.learn.python.learn import models
+from tensorflow.contrib.learn.python.learn.estimators.base import TensorFlowBaseTransformer
+from tensorflow.python.ops import nn
 
 
 class TensorFlowDNNAutoencoder(TensorFlowBaseTransformer):
-    """TensorFlow Autoencoder Regressor model.
+  """TensorFlow Autoencoder Regressor model.
 
-    Parameters:
-        hidden_units: List of hidden units per layer.
-        batch_size: Mini batch size.
-        activation: activation function used to map inner latent layer onto
-                    reconstruction layer.
-        add_noise: a function that adds noise to tensor_in, 
-               e.g. def add_noise(x):
-                        return(x + np.random.normal(0, 0.1, (len(x), len(x[0]))))
-        steps: Number of steps to run over data.
-        optimizer: Optimizer name (or class), for example "SGD", "Adam",
-                   "Adagrad".
-        learning_rate: If this is constant float value, no decay function is used.
-            Instead, a customized decay function can be passed that accepts
-            global_step as parameter and returns a Tensor.
-            e.g. exponential decay function:
-            def exp_decay(global_step):
-                return tf.train.exponential_decay(
-                    learning_rate=0.1, global_step,
-                    decay_steps=2, decay_rate=0.001)
-        continue_training: when continue_training is True, once initialized
-            model will be continuely trained on every call of fit.
-        config: RunConfig object that controls the configurations of the session,
-            e.g. num_cores, gpu_memory_fraction, etc.
-        verbose: Controls the verbosity, possible values:
-                 0: the algorithm and debug information is muted.
-                 1: trainer prints the progress.
-                 2: log device placement is printed.
-        dropout: When not None, the probability we will drop out a given
-                 coordinate.
-    """
-    def __init__(self, hidden_units, n_classes=0, batch_size=32,
-                 steps=200, optimizer="Adagrad", learning_rate=0.1,
-                 clip_gradients=5.0, activation=nn.relu, add_noise=None,
-                 continue_training=False, config=None,
-                 verbose=1, dropout=None):
-        self.hidden_units = hidden_units
-        self.dropout = dropout
-        self.activation = activation
-        self.add_noise = add_noise
-        super(TensorFlowDNNAutoencoder, self).__init__(
-            model_fn=self._model_fn,
-            n_classes=n_classes,
-            batch_size=batch_size, steps=steps, optimizer=optimizer,
-            learning_rate=learning_rate, clip_gradients=clip_gradients,
-            continue_training=continue_training,
-            config=config, verbose=verbose)
+  Parameters:
+      hidden_units: List of hidden units per layer.
+      batch_size: Mini batch size.
+      activation: activation function used to map inner latent layer onto
+                  reconstruction layer.
+      add_noise: a function that adds noise to tensor_in,
+             e.g. def add_noise(x):
+                      return(x + np.random.normal(0, 0.1, (len(x), len(x[0]))))
+      steps: Number of steps to run over data.
+      optimizer: Optimizer name (or class), for example "SGD", "Adam",
+                 "Adagrad".
+      learning_rate: If this is constant float value, no decay function is used.
+          Instead, a customized decay function can be passed that accepts
+          global_step as parameter and returns a Tensor.
+          e.g. exponential decay function:
+          def exp_decay(global_step):
+              return tf.train.exponential_decay(
+                  learning_rate=0.1, global_step,
+                  decay_steps=2, decay_rate=0.001)
+      continue_training: when continue_training is True, once initialized
+          model will be continuely trained on every call of fit.
+      config: RunConfig object that controls the configurations of the session,
+          e.g. num_cores, gpu_memory_fraction, etc.
+      verbose: Controls the verbosity, possible values:
+               0: the algorithm and debug information is muted.
+               1: trainer prints the progress.
+               2: log device placement is printed.
+      dropout: When not None, the probability we will drop out a given
+               coordinate.
+  """
 
-    def _model_fn(self, X, y):
-        encoder, decoder, autoencoder_estimator = models.get_autoencoder_model(
-            self.hidden_units,
-            models.linear_regression,
-            activation=self.activation,
-            add_noise=self.add_noise,
-            dropout=self.dropout)(X)
-        self.encoder = encoder
-        self.decoder = decoder
-        return autoencoder_estimator
+  def __init__(self, hidden_units, n_classes=0, batch_size=32,
+               steps=200, optimizer="Adagrad", learning_rate=0.1,
+               clip_gradients=5.0, activation=nn.relu, add_noise=None,
+               continue_training=False, config=None,
+               verbose=1, dropout=None):
+    self.hidden_units = hidden_units
+    self.dropout = dropout
+    self.activation = activation
+    self.add_noise = add_noise
+    super(TensorFlowDNNAutoencoder, self).__init__(
+        model_fn=self._model_fn,
+        n_classes=n_classes,
+        batch_size=batch_size, steps=steps, optimizer=optimizer,
+        learning_rate=learning_rate, clip_gradients=clip_gradients,
+        continue_training=continue_training,
+        config=config, verbose=verbose)
 
-    def generate(self, hidden=None):
-        """Generate new data using trained construction layer"""
-        if hidden is None:
-            last_layer = len(self.hidden_units) - 1
-            bias = self.get_tensor_value('encoder/dnn/layer%d/Linear/Bias:0' % last_layer)
-            import numpy as np
-            hidden = np.random.normal(size=bias.shape)
-            hidden = np.reshape(hidden, (1, len(hidden)))
-        return self._session.run(self.decoder, feed_dict={self.encoder: hidden})
+  def _model_fn(self, X, y):
+    encoder, decoder, autoencoder_estimator = models.get_autoencoder_model(
+        self.hidden_units,
+        models.linear_regression,
+        activation=self.activation,
+        add_noise=self.add_noise,
+        dropout=self.dropout)(X)
+    self.encoder = encoder
+    self.decoder = decoder
+    return autoencoder_estimator
 
-    @property
-    def weights_(self):
-        """Returns weights of the autoencoder's weight layers."""
-        weights = []
-        for layer in range(len(self.hidden_units)):
-            weights.append(self.get_tensor_value('encoder/dnn/layer%d/Linear/Matrix:0' % layer))
-        for layer in range(len(self.hidden_units)):
-            weights.append(self.get_tensor_value('decoder/dnn/layer%d/Linear/Matrix:0' % layer))
-        weights.append(self.get_tensor_value('linear_regression/weights:0'))
-        return weights
+  def generate(self, hidden=None):
+    """Generate new data using trained construction layer."""
+    if hidden is None:
+      last_layer = len(self.hidden_units) - 1
+      bias = self.get_tensor_value(
+          "encoder/dnn/layer%d/Linear/Bias:0" % last_layer)
+      hidden = np.random.normal(size=bias.shape)
+      hidden = np.reshape(hidden, (1, len(hidden)))
+    return self._session.run(self.decoder, feed_dict={self.encoder: hidden})
 
-    @property
-    def bias_(self):
-        """Returns bias of the autoencoder's bias layers."""
-        biases = []
-        for layer in range(len(self.hidden_units)):
-            biases.append(self.get_tensor_value('encoder/dnn/layer%d/Linear/Bias:0' % layer))
-        for layer in range(len(self.hidden_units)):
-            biases.append(self.get_tensor_value('decoder/dnn/layer%d/Linear/Bias:0' % layer))
-        biases.append(self.get_tensor_value('linear_regression/bias:0'))
-        return biases
+  @property
+  def weights_(self):
+    """Returns weights of the autoencoder's weight layers."""
+    weights = []
+    for layer in range(len(self.hidden_units)):
+      weights.append(self.get_tensor_value(
+          "encoder/dnn/layer%d/Linear/Matrix:0" % layer))
+    for layer in range(len(self.hidden_units)):
+      weights.append(self.get_tensor_value(
+          "decoder/dnn/layer%d/Linear/Matrix:0" % layer))
+    weights.append(self.get_tensor_value("linear_regression/weights:0"))
+    return weights
+
+  @property
+  def bias_(self):
+    """Returns bias of the autoencoder's bias layers."""
+    biases = []
+    for layer in range(len(self.hidden_units)):
+      biases.append(self.get_tensor_value(
+          "encoder/dnn/layer%d/Linear/Bias:0" % layer))
+    for layer in range(len(self.hidden_units)):
+      biases.append(self.get_tensor_value(
+          "decoder/dnn/layer%d/Linear/Bias:0" % layer))
+    biases.append(self.get_tensor_value("linear_regression/bias:0"))
+    return biases
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/base.py b/tensorflow/contrib/learn/python/learn/estimators/base.py
index 39131f059b0..ab00ae76f78 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/base.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/base.py
@@ -1,5 +1,4 @@
-"""Base estimator class."""
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,18 +11,17 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+
+"""Base estimator class."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import datetime
 import json
 import os
-import shutil
 from six import string_types
 
-import numpy as np
-
 from google.protobuf import text_format
 from tensorflow.python.platform import gfile
 
diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
index 017667699bc..5447d9ec052 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
@@ -1,5 +1,4 @@
-"""Deep Neural Network estimators."""
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,6 +11,9 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+
+"""Deep Neural Network estimators."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py
index d58ab35f5ee..ef73c44013a 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/linear.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py
@@ -1,5 +1,4 @@
-"""Linear Estimators."""
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,6 +11,9 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+
+"""Linear Estimators."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/tensorflow/contrib/learn/python/learn/estimators/rnn.py b/tensorflow/contrib/learn/python/learn/estimators/rnn.py
index b703f607657..719a19a5bc8 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/rnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/rnn.py
@@ -1,5 +1,4 @@
-"""Recurrent Neural Network estimators."""
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,6 +11,9 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+
+"""Recurrent Neural Network estimators."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/tensorflow/contrib/learn/python/learn/io/data_feeder.py b/tensorflow/contrib/learn/python/learn/io/data_feeder.py
index 04bbd997482..b3ed3bc7d92 100644
--- a/tensorflow/contrib/learn/python/learn/io/data_feeder.py
+++ b/tensorflow/contrib/learn/python/learn/io/data_feeder.py
@@ -1,6 +1,4 @@
-"""Implementations of different data feeders to provide data for TF trainer."""
-
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -14,6 +12,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
+"""Implementations of different data feeders to provide data for TF trainer."""
+
 # TODO(ipolosukhin): Replace this module with feed-dict queue runners & queues.
 
 from __future__ import absolute_import
diff --git a/tensorflow/contrib/learn/python/learn/models.py b/tensorflow/contrib/learn/python/learn/models.py
index 8cabd390fc7..dddd152f368 100644
--- a/tensorflow/contrib/learn/python/learn/models.py
+++ b/tensorflow/contrib/learn/python/learn/models.py
@@ -1,5 +1,4 @@
-"""Various high level TF models."""
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,13 +11,16 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
+
+"""Various high level TF models."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.contrib.learn.python.learn.ops import autoencoder_ops
 from tensorflow.contrib.learn.python.learn.ops import dnn_ops
 from tensorflow.contrib.learn.python.learn.ops import losses_ops
-from tensorflow.contrib.learn.python.learn.ops import autoencoder_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops as array_ops_
@@ -29,8 +31,7 @@ from tensorflow.python.ops import variable_scope as vs
 
 
 def linear_regression_zero_init(X, y):
-  """Creates a linear regression TensorFlow subgraph, in which weights and
-    bias terms are initialized to exactly zero.
+  """Linear regression subgraph with zero-value initial weights and bias.
 
   Args:
     X: tensor or placeholder for input features.
@@ -43,8 +44,7 @@ def linear_regression_zero_init(X, y):
 
 
 def logistic_regression_zero_init(X, y):
-  """Creates a logistic regression TensorFlow subgraph, in which weights and
-       bias terms are initialized to exactly zero.
+  """Logistic regression subgraph with zero-value initial weights and bias.
 
   Args:
     X: tensor or placeholder for input features.
@@ -85,7 +85,7 @@ def linear_regression(X, y, init_mean=None, init_stddev=1.0):
     else:
       output_shape = y_shape[1]
     # Set up the requested initialization.
-    if (init_mean is None):
+    if init_mean is None:
       weights = vs.get_variable('weights', [X.get_shape()[1], output_shape])
       bias = vs.get_variable('bias', [output_shape])
     else:
@@ -134,7 +134,7 @@ def logistic_regression(X,
     logging_ops.histogram_summary('logistic_regression.X', X)
     logging_ops.histogram_summary('logistic_regression.y', y)
     # Set up the requested initialization.
-    if (init_mean is None):
+    if init_mean is None:
       weights = vs.get_variable('weights',
                                 [X.get_shape()[1], y.get_shape()[-1]])
       bias = vs.get_variable('bias', [y.get_shape()[-1]])
@@ -188,35 +188,37 @@ def get_dnn_model(hidden_units, target_predictor_fn, dropout=None):
 
   return dnn_estimator
 
+
 def get_autoencoder_model(hidden_units, target_predictor_fn,
                           activation, add_noise=None, dropout=None):
-    """Returns a function that creates a Autoencoder TensorFlow subgraph with given
-    params.
+  """Returns a function that creates a Autoencoder TensorFlow subgraph.
 
-    Args:
-        hidden_units: List of values of hidden units for layers.
-        target_predictor_fn: Function that will predict target from input
-                             features. This can be logistic regression,
-                             linear regression or any other model,
-                             that takes X, y and returns predictions and loss tensors.
-        activation: activation function used to map inner latent layer onto
-                    reconstruction layer.
-        add_noise: a function that adds noise to tensor_in, 
-               e.g. def add_noise(x):
-                        return(x + np.random.normal(0, 0.1, (len(x), len(x[0]))))
-        dropout: When not none, causes dropout regularization to be used,
-                 with the specified probability of removing a given coordinate.
+  Args:
+    hidden_units: List of values of hidden units for layers.
+    target_predictor_fn: Function that will predict target from input
+                         features. This can be logistic regression,
+                         linear regression or any other model,
+                         that takes X, y and returns predictions and loss
+                         tensors.
+    activation: activation function used to map inner latent layer onto
+                reconstruction layer.
+    add_noise: a function that adds noise to tensor_in,
+           e.g. def add_noise(x):
+                    return(x + np.random.normal(0, 0.1, (len(x), len(x[0]))))
+    dropout: When not none, causes dropout regularization to be used,
+             with the specified probability of removing a given coordinate.
+
+  Returns:
+      A function that creates the subgraph.
+  """
+  def dnn_autoencoder_estimator(X):
+    """Autoencoder estimator with target predictor function on top."""
+    encoder, decoder = autoencoder_ops.dnn_autoencoder(
+        X, hidden_units, activation,
+        add_noise=add_noise, dropout=dropout)
+    return encoder, decoder, target_predictor_fn(X, decoder)
+  return dnn_autoencoder_estimator
 
-    Returns:
-        A function that creates the subgraph.
-    """
-    def dnn_autoencoder_estimator(X):
-        """Autoencoder estimator with target predictor function on top."""
-        encoder, decoder = autoencoder_ops.dnn_autoencoder(
-          X, hidden_units, activation,
-          add_noise=add_noise, dropout=dropout)
-        return encoder, decoder, target_predictor_fn(X, decoder)
-    return dnn_autoencoder_estimator
 
 ## This will be in Tensorflow 0.7.
 ## TODO(ilblackdragon): Clean this up when it's released
diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py
index 861db1758f5..79c629d9491 100644
--- a/tensorflow/contrib/learn/python/learn/monitors.py
+++ b/tensorflow/contrib/learn/python/learn/monitors.py
@@ -1,5 +1,4 @@
-"""Monitors to track model training, report on progress and request early stopping"""
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -13,6 +12,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
+"""Monitors to track training, report progress and request early stopping."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/tensorflow/examples/skflow/boston.py b/tensorflow/examples/skflow/boston.py
index bf2066770c7..9d895bd8e38 100644
--- a/tensorflow/examples/skflow/boston.py
+++ b/tensorflow/examples/skflow/boston.py
@@ -1,4 +1,4 @@
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
diff --git a/tensorflow/examples/skflow/iris.py b/tensorflow/examples/skflow/iris.py
index c6c566b10fd..ea44428d541 100644
--- a/tensorflow/examples/skflow/iris.py
+++ b/tensorflow/examples/skflow/iris.py
@@ -1,4 +1,4 @@
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
diff --git a/tensorflow/examples/skflow/iris_custom_decay_dnn.py b/tensorflow/examples/skflow/iris_custom_decay_dnn.py
index f9c172725d9..b8b1a1dd140 100644
--- a/tensorflow/examples/skflow/iris_custom_decay_dnn.py
+++ b/tensorflow/examples/skflow/iris_custom_decay_dnn.py
@@ -1,4 +1,4 @@
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
diff --git a/tensorflow/examples/skflow/mnist.py b/tensorflow/examples/skflow/mnist.py
index 082ecb2f839..d1288a31e98 100644
--- a/tensorflow/examples/skflow/mnist.py
+++ b/tensorflow/examples/skflow/mnist.py
@@ -1,4 +1,4 @@
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
diff --git a/tensorflow/examples/skflow/resnet.py b/tensorflow/examples/skflow/resnet.py
index f1f39568d46..03a5d5e5191 100644
--- a/tensorflow/examples/skflow/resnet.py
+++ b/tensorflow/examples/skflow/resnet.py
@@ -1,4 +1,4 @@
-#  Copyright 2015-present The Scikit Flow Authors. All Rights Reserved.
+#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
@@ -12,147 +12,155 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-"""
-This example builds deep residual network for mnist data.
+"""This example builds deep residual network for mnist data.
+
 Reference Paper: http://arxiv.org/pdf/1512.03385.pdf
 
 Note that this is still a work-in-progress. Feel free to submit a PR
 to make this better.
 """
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
 from collections import namedtuple
 from math import sqrt
+import os
 
 from sklearn import metrics
 import tensorflow as tf
-from tensorflow.examples.tutorials.mnist import input_data
 from tensorflow.contrib import learn
+from tensorflow.examples.tutorials.mnist import input_data
 
 
 def res_net(x, y, activation=tf.nn.relu):
-    """Builds a residual network. Note that if the input tensor is 2D, it must be
-    square in order to be converted to a 4D tensor. 
+  """Builds a residual network.
 
-    Borrowed structure from here: https://github.com/pkmital/tensorflow_tutorials/blob/master/10_residual_network.py
+  Note that if the input tensor is 2D, it must be square in order to be
+  converted to a 4D tensor.
 
-    Args:
-        x: Input of the network
-        y: Output of the network
-        activation: Activation function to apply after each convolution
-    """
+  Borrowed structure from:
+  github.com/pkmital/tensorflow_tutorials/blob/master/10_residual_network.py
 
-    # Configurations for each bottleneck block
-    BottleneckBlock = namedtuple(
-        'BottleneckBlock', ['num_layers', 'num_filters', 'bottleneck_size'])
-    blocks = [BottleneckBlock(3, 128, 32),
-              BottleneckBlock(3, 256, 64),
-              BottleneckBlock(3, 512, 128),
-              BottleneckBlock(3, 1024, 256)]
+  Args:
+    x: Input of the network
+    y: Output of the network
+    activation: Activation function to apply after each convolution
 
-    input_shape = x.get_shape().as_list()
+  Returns:
+    Predictions and loss tensors.
+  """
 
-    # Reshape the input into the right shape if it's 2D tensor
-    if len(input_shape) == 2:
-        ndim = int(sqrt(input_shape[1]))
-        x = tf.reshape(x, [-1, ndim, ndim, 1])
+  # Configurations for each bottleneck block.
+  BottleneckBlock = namedtuple(
+      'BottleneckBlock', ['num_layers', 'num_filters', 'bottleneck_size'])
+  blocks = [BottleneckBlock(3, 128, 32),
+            BottleneckBlock(3, 256, 64),
+            BottleneckBlock(3, 512, 128),
+            BottleneckBlock(3, 1024, 256)]
 
-    # First convolution expands to 64 channels
-    with tf.variable_scope('conv_layer1'):
-        net = learn.ops.conv2d(x, 64, [7, 7], batch_norm=True,
-                                activation=activation, bias=False)
+  input_shape = x.get_shape().as_list()
 
-    # Max pool
-    net = tf.nn.max_pool(
-        net, [1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
+  # Reshape the input into the right shape if it's 2D tensor
+  if len(input_shape) == 2:
+    ndim = int(sqrt(input_shape[1]))
+    x = tf.reshape(x, [-1, ndim, ndim, 1])
 
-    # First chain of resnets
-    with tf.variable_scope('conv_layer2'):
-        net = learn.ops.conv2d(net, blocks[0].num_filters,
-                               [1, 1], [1, 1, 1, 1],
-                               padding='VALID', bias=True)
+  # First convolution expands to 64 channels
+  with tf.variable_scope('conv_layer1'):
+    net = learn.ops.conv2d(x, 64, [7, 7], batch_norm=True,
+                           activation=activation, bias=False)
 
-    # Create each bottleneck building block for each layer
-    for block_i, block in enumerate(blocks):
-        for layer_i in range(block.num_layers):
+  # Max pool
+  net = tf.nn.max_pool(
+      net, [1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
 
-            name = 'block_%d/layer_%d' % (block_i, layer_i)
+  # First chain of resnets
+  with tf.variable_scope('conv_layer2'):
+    net = learn.ops.conv2d(net, blocks[0].num_filters,
+                           [1, 1], [1, 1, 1, 1],
+                           padding='VALID', bias=True)
 
-            # 1x1 convolution responsible for reducing dimension
-            with tf.variable_scope(name + '/conv_in'):
-                conv = learn.ops.conv2d(net, block.bottleneck_size,
-                                         [1, 1], [1, 1, 1, 1],
-                                         padding='VALID',
-                                         activation=activation,
-                                         batch_norm=True,
-                                         bias=False)
+  # Create each bottleneck building block for each layer
+  for block_i, block in enumerate(blocks):
+    for layer_i in range(block.num_layers):
 
-            with tf.variable_scope(name + '/conv_bottleneck'):
-                conv = learn.ops.conv2d(conv, block.bottleneck_size,
-                                         [3, 3], [1, 1, 1, 1],
-                                         padding='SAME',
-                                         activation=activation,
-                                         batch_norm=True,
-                                         bias=False)
+      name = 'block_%d/layer_%d' % (block_i, layer_i)
 
-            # 1x1 convolution responsible for restoring dimension
-            with tf.variable_scope(name + '/conv_out'):
-                conv = learn.ops.conv2d(conv, block.num_filters,
-                                         [1, 1], [1, 1, 1, 1],
-                                         padding='VALID',
-                                         activation=activation,
-                                         batch_norm=True,
-                                         bias=False)
+      # 1x1 convolution responsible for reducing dimension
+      with tf.variable_scope(name + '/conv_in'):
+        conv = learn.ops.conv2d(net, block.bottleneck_size,
+                                [1, 1], [1, 1, 1, 1],
+                                padding='VALID',
+                                activation=activation,
+                                batch_norm=True,
+                                bias=False)
 
-            # shortcut connections that turn the network into its counterpart
-            # residual function (identity shortcut)
-            net = conv + net
+      with tf.variable_scope(name + '/conv_bottleneck'):
+        conv = learn.ops.conv2d(conv, block.bottleneck_size,
+                                [3, 3], [1, 1, 1, 1],
+                                padding='SAME',
+                                activation=activation,
+                                batch_norm=True,
+                                bias=False)
 
-        try:
-            # upscale to the next block size
-            next_block = blocks[block_i + 1]
-            with tf.variable_scope('block_%d/conv_upscale' % block_i):
-                net = learn.ops.conv2d(net, next_block.num_filters,
-                                        [1, 1], [1, 1, 1, 1],
-                                        bias=False,
-                                        padding='SAME')
-        except IndexError:
-            pass
+      # 1x1 convolution responsible for restoring dimension
+      with tf.variable_scope(name + '/conv_out'):
+        conv = learn.ops.conv2d(conv, block.num_filters,
+                                [1, 1], [1, 1, 1, 1],
+                                padding='VALID',
+                                activation=activation,
+                                batch_norm=True,
+                                bias=False)
 
-    net_shape = net.get_shape().as_list()
-    net = tf.nn.avg_pool(net,
-                         ksize=[1, net_shape[1], net_shape[2], 1],
-                         strides=[1, 1, 1, 1], padding='VALID')
+      # shortcut connections that turn the network into its counterpart
+      # residual function (identity shortcut)
+      net = conv + net
 
-    net_shape = net.get_shape().as_list()
-    net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]])
+      try:
+        # upscale to the next block size
+        next_block = blocks[block_i + 1]
+        with tf.variable_scope('block_%d/conv_upscale' % block_i):
+          net = learn.ops.conv2d(net, next_block.num_filters,
+                                 [1, 1], [1, 1, 1, 1],
+                                 bias=False,
+                                 padding='SAME')
+      except IndexError:
+        pass
 
-    return learn.models.logistic_regression(net, y)
+  net_shape = net.get_shape().as_list()
+  net = tf.nn.avg_pool(net,
+                       ksize=[1, net_shape[1], net_shape[2], 1],
+                       strides=[1, 1, 1, 1], padding='VALID')
+
+  net_shape = net.get_shape().as_list()
+  net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]])
+
+  return learn.models.logistic_regression(net, y)
 
 
 # Download and load MNIST data.
 mnist = input_data.read_data_sets('MNIST_data')
 
 # Restore model if graph is saved into a folder.
-if os.path.exists("models/resnet/graph.pbtxt"):
-    classifier = learn.TensorFlowEstimator.restore("models/resnet/")
+if os.path.exists('models/resnet/graph.pbtxt'):
+  classifier = learn.TensorFlowEstimator.restore('models/resnet/')
 else:
-    # Create a new resnet classifier.
-    classifier = learn.TensorFlowEstimator(
-        model_fn=res_net, n_classes=10, batch_size=100, steps=100,
-        learning_rate=0.001, continue_training=True)
+  # Create a new resnet classifier.
+  classifier = learn.TensorFlowEstimator(
+      model_fn=res_net, n_classes=10, batch_size=100, steps=100,
+      learning_rate=0.001, continue_training=True)
 
 while True:
-    # Train model and save summaries into logdir.
-    classifier.fit(mnist.train.images, mnist.train.labels, logdir="models/resnet/")
+  # Train model and save summaries into logdir.
+  classifier.fit(
+      mnist.train.images, mnist.train.labels, logdir='models/resnet/')
 
-    # Calculate accuracy.
-    score = metrics.accuracy_score(
-        mnist.test.labels, classifier.predict(mnist.test.images, batch_size=64))
-    print('Accuracy: {0:f}'.format(score))
+  # Calculate accuracy.
+  score = metrics.accuracy_score(
+      mnist.test.labels, classifier.predict(mnist.test.images, batch_size=64))
+  print('Accuracy: {0:f}'.format(score))
 
-    # Save model graph and checkpoints.
-    classifier.save("models/resnet/")
+  # Save model graph and checkpoints.
+  classifier.save('models/resnet/')

From 22791b144c9727de2bc14ff0326918caa28b64ad Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <nobody@tensorflow.org>
Date: Wed, 25 May 2016 13:13:37 -0800
Subject: [PATCH 12/20] Don't load events until all runs have been added.

This means that the runs will show up in TensorBoard even though they won't have
events yet (since we still only load from one event at a time). This requires us
to disable the activation checking logic in EventAccumulator.
Change: 123252077
---
 .../python/summary/event_accumulator.py       | 28 +------------------
 .../python/summary/event_accumulator_test.py  | 12 --------
 .../python/summary/event_multiplexer.py       | 10 +------
 tensorflow/tensorboard/backend/server.py      |  7 ++---
 4 files changed, 4 insertions(+), 53 deletions(-)

diff --git a/tensorflow/python/summary/event_accumulator.py b/tensorflow/python/summary/event_accumulator.py
index 2ee8a369f76..204ed009129 100644
--- a/tensorflow/python/summary/event_accumulator.py
+++ b/tensorflow/python/summary/event_accumulator.py
@@ -114,8 +114,7 @@ class EventAccumulator(object):
   `Accumulator.Scalars(tag)`) allow for the retrieval of all data
   associated with that tag.
 
-  Before usage, the `EventAccumulator` must be activated via `Reload()`. This
-  method synchronosly loads all of the data written so far.
+  The `Reload()` method synchronously loads all of the data written so far.
 
   Histograms, audio, and images are very large, so storing all of them is not
   recommended.
@@ -175,7 +174,6 @@ class EventAccumulator(object):
     self._compression_bps = compression_bps
     self.purge_orphaned_data = purge_orphaned_data
 
-    self._activated = False
     self.most_recent_step = -1
     self.most_recent_wall_time = -1
     self.file_version = None
@@ -188,12 +186,10 @@ class EventAccumulator(object):
     """Loads all events added since the last call to `Reload`.
 
     If `Reload` was never called, loads all events in the file.
-    Calling `Reload` activates the `EventAccumulator`.
 
     Returns:
       The `EventAccumulator`.
     """
-    self._activated = True
     with self._generator_mutex:
       for event in self._generator.Load():
         if event.HasField('file_version'):
@@ -232,13 +228,9 @@ class EventAccumulator(object):
   def Tags(self):
     """Return all tags found in the value stream.
 
-    Raises:
-      RuntimeError: If the `EventAccumulator` has not been activated.
-
     Returns:
       A `{tagType: ['list', 'of', 'tags']}` dictionary.
     """
-    self._VerifyActivated()
     return {IMAGES: self._images.Keys(),
             AUDIO: self._audio.Keys(),
             HISTOGRAMS: self._histograms.Keys(),
@@ -255,12 +247,10 @@ class EventAccumulator(object):
 
     Raises:
       KeyError: If the tag is not found.
-      RuntimeError: If the `EventAccumulator` has not been activated.
 
     Returns:
       An array of `ScalarEvent`s.
     """
-    self._VerifyActivated()
     return self._scalars.Items(tag)
 
   def Graph(self):
@@ -268,12 +258,10 @@ class EventAccumulator(object):
 
     Raises:
       ValueError: If there is no graph for this run.
-      RuntimeError: If the `EventAccumulator` has not been activated.
 
     Returns:
       The `graph_def` proto.
     """
-    self._VerifyActivated()
     if self._graph is None:
       raise ValueError('There is no graph in this EventAccumulator')
     graph = graph_pb2.GraphDef()
@@ -288,12 +276,10 @@ class EventAccumulator(object):
 
     Raises:
       ValueError: If the tag is not found.
-      RuntimeError: If the `EventAccumulator` has not been activated.
 
     Returns:
       The metadata in form of `RunMetadata` proto.
     """
-    self._VerifyActivated()
     if tag not in self._tagged_metadata:
       raise ValueError('There is no run metadata with this tag name')
 
@@ -309,12 +295,10 @@ class EventAccumulator(object):
 
     Raises:
       KeyError: If the tag is not found.
-      RuntimeError: If the `EventAccumulator` has not been activated.
 
     Returns:
       An array of `HistogramEvent`s.
     """
-    self._VerifyActivated()
     return self._histograms.Items(tag)
 
   def CompressedHistograms(self, tag):
@@ -325,12 +309,10 @@ class EventAccumulator(object):
 
     Raises:
       KeyError: If the tag is not found.
-      RuntimeError: If the `EventAccumulator` has not been activated.
 
     Returns:
       An array of `CompressedHistogramEvent`s.
     """
-    self._VerifyActivated()
     return self._compressed_histograms.Items(tag)
 
   def Images(self, tag):
@@ -341,12 +323,10 @@ class EventAccumulator(object):
 
     Raises:
       KeyError: If the tag is not found.
-      RuntimeError: If the `EventAccumulator` has not been activated.
 
     Returns:
       An array of `ImageEvent`s.
     """
-    self._VerifyActivated()
     return self._images.Items(tag)
 
   def Audio(self, tag):
@@ -357,12 +337,10 @@ class EventAccumulator(object):
 
     Raises:
       KeyError: If the tag is not found.
-      RuntimeError: If the `EventAccumulator` has not been activated.
 
     Returns:
       An array of `AudioEvent`s.
     """
-    self._VerifyActivated()
     return self._audio.Items(tag)
 
   def _MaybePurgeOrphanedData(self, event):
@@ -599,10 +577,6 @@ class EventAccumulator(object):
                                    event.wall_time, *expired_per_type)
       logging.warn(purge_msg)
 
-  def _VerifyActivated(self):
-    if not self._activated:
-      raise RuntimeError('Accumulator must be activated before it may be used.')
-
 
 def _GetPurgeMessage(most_recent_step, most_recent_wall_time, event_step,
                      event_wall_time, num_expired_scalars, num_expired_histos,
diff --git a/tensorflow/python/summary/event_accumulator_test.py b/tensorflow/python/summary/event_accumulator_test.py
index f6b60b91db9..b154d853322 100644
--- a/tensorflow/python/summary/event_accumulator_test.py
+++ b/tensorflow/python/summary/event_accumulator_test.py
@@ -456,18 +456,6 @@ class MockingEventAccumulatorTest(EventAccumulatorTest):
     self.assertEqual(acc.Audio('snd1'), [snd1])
     self.assertEqual(acc.Audio('snd2'), [snd2])
 
-  def testActivation(self):
-    gen = _EventGenerator()
-    acc = ea.EventAccumulator(gen)
-    self.assertFalse(acc._activated)
-    with self.assertRaises(RuntimeError):
-      acc.Tags()
-    with self.assertRaises(RuntimeError):
-      acc.Scalars('s1')
-    acc.Reload()
-    self.assertTrue(acc._activated)
-    acc._activated = False
-
   def testKeyError(self):
     gen = _EventGenerator()
     acc = ea.EventAccumulator(gen)
diff --git a/tensorflow/python/summary/event_multiplexer.py b/tensorflow/python/summary/event_multiplexer.py
index a0f4ef402f3..00eab3d215d 100644
--- a/tensorflow/python/summary/event_multiplexer.py
+++ b/tensorflow/python/summary/event_multiplexer.py
@@ -113,8 +113,7 @@ class EventMultiplexer(object):
       accumulator.
 
     If `Reload` has been called, it will `Reload` the newly created
-    accumulators. This maintains the invariant that once the Multiplexer was
-    activated, all of its accumulators are active.
+    accumulators.
 
     Args:
       path: Path to the event files (or event directory) for given run.
@@ -199,7 +198,6 @@ class EventMultiplexer(object):
     Raises:
       KeyError: If the run is not found, or the tag is not available for
         the given run.
-      RuntimeError: If the run's `EventAccumulator` has not been activated.
 
     Returns:
       An array of `event_accumulator.ScalarEvents`.
@@ -216,7 +214,6 @@ class EventMultiplexer(object):
     Raises:
       KeyError: If the run is not found.
       ValueError: If the run does not have an associated graph.
-      RuntimeError: If the run's EventAccumulator has not been activated.
 
     Returns:
       The `graph_def` protobuf data structure.
@@ -234,7 +231,6 @@ class EventMultiplexer(object):
     Raises:
       KeyError: If the run is not found, or the tag is not available for the
         given run.
-      RuntimeError: If the run's EventAccumulator has not been activated.
 
     Returns:
       The metadata in the form of `RunMetadata` protobuf data structure.
@@ -252,7 +248,6 @@ class EventMultiplexer(object):
     Raises:
       KeyError: If the run is not found, or the tag is not available for
         the given run.
-      RuntimeError: If the run's `EventAccumulator` has not been activated.
 
     Returns:
       An array of `event_accumulator.HistogramEvents`.
@@ -270,7 +265,6 @@ class EventMultiplexer(object):
     Raises:
       KeyError: If the run is not found, or the tag is not available for
         the given run.
-      RuntimeError: If the run's EventAccumulator has not been activated.
 
     Returns:
       An array of `event_accumulator.CompressedHistogramEvents`.
@@ -288,7 +282,6 @@ class EventMultiplexer(object):
     Raises:
       KeyError: If the run is not found, or the tag is not available for
         the given run.
-      RuntimeError: If the run's `EventAccumulator` has not been activated.
 
     Returns:
       An array of `event_accumulator.ImageEvents`.
@@ -306,7 +299,6 @@ class EventMultiplexer(object):
     Raises:
       KeyError: If the run is not found, or the tag is not available for
         the given run.
-      RuntimeError: If the run's `EventAccumulator` has not been activated.
 
     Returns:
       An array of `event_accumulator.AudioEvents`.
diff --git a/tensorflow/tensorboard/backend/server.py b/tensorflow/tensorboard/backend/server.py
index cfdd6c56543..b025a2f5b9f 100644
--- a/tensorflow/tensorboard/backend/server.py
+++ b/tensorflow/tensorboard/backend/server.py
@@ -120,12 +120,9 @@ def StartMultiplexerReloadingThread(multiplexer, path_to_run, load_interval):
 
   Returns:
     A started `threading.Thread` that reloads the multiplexer.
-
   """
-  # Ensure the Multiplexer initializes in a loaded state before it adds runs
-  # So it can handle HTTP requests while runs are loading
-  multiplexer.Reload()
-
+  # We don't call multiplexer.Reload() here because that would make
+  # AddRunsFromDirectory block until the runs have all loaded.
   for path in path_to_run.keys():
     if gcs.IsGCSPath(path):
       gcs.CheckIsSupported()

From 97222e42933a0546289d90de3c494c127e17ee9e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <nobody@tensorflow.org>
Date: Wed, 25 May 2016 13:23:43 -0800
Subject: [PATCH 13/20] Allowing alternate implementations of the ffmpeg_lib
 library. Change: 123253228

---
 tensorflow/contrib/ffmpeg/BUILD                    |  2 ++
 tensorflow/contrib/ffmpeg/decode_audio_op.cc       |  2 +-
 tensorflow/contrib/ffmpeg/default/BUILD            |  5 ++++-
 tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc    |  8 ++++----
 .../contrib/ffmpeg/default/ffmpeg_lib_test.cc      |  6 +++---
 tensorflow/contrib/ffmpeg/encode_audio_op.cc       | 14 ++++++++++----
 .../contrib/ffmpeg/{default => }/ffmpeg_lib.h      | 11 ++++++-----
 .../tools/ci_build/builds/integration_tests.sh     |  4 ++--
 8 files changed, 32 insertions(+), 20 deletions(-)
 rename tensorflow/contrib/ffmpeg/{default => }/ffmpeg_lib.h (83%)

diff --git a/tensorflow/contrib/ffmpeg/BUILD b/tensorflow/contrib/ffmpeg/BUILD
index 75d58ccf23b..268d7bea369 100644
--- a/tensorflow/contrib/ffmpeg/BUILD
+++ b/tensorflow/contrib/ffmpeg/BUILD
@@ -17,6 +17,8 @@ filegroup(
     srcs = glob(["testdata/*"]),
 )
 
+exports_files(["ffmpeg_lib.h"])
+
 cc_library(
     name = "decode_audio_op_cc",
     srcs = ["decode_audio_op.cc"],
diff --git a/tensorflow/contrib/ffmpeg/decode_audio_op.cc b/tensorflow/contrib/ffmpeg/decode_audio_op.cc
index b38b9957a84..a2ecc7f287e 100644
--- a/tensorflow/contrib/ffmpeg/decode_audio_op.cc
+++ b/tensorflow/contrib/ffmpeg/decode_audio_op.cc
@@ -18,7 +18,7 @@
 #include <cstdio>
 #include <set>
 
-#include "tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h"
+#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/io/path.h"
diff --git a/tensorflow/contrib/ffmpeg/default/BUILD b/tensorflow/contrib/ffmpeg/default/BUILD
index f8566df6730..e1b7bb61924 100644
--- a/tensorflow/contrib/ffmpeg/default/BUILD
+++ b/tensorflow/contrib/ffmpeg/default/BUILD
@@ -11,7 +11,10 @@ package(default_visibility = ["//tensorflow:__subpackages__"])
 cc_library(
     name = "ffmpeg_lib",
     srcs = ["ffmpeg_lib.cc"],
-    hdrs = ["ffmpeg_lib.h"],
+    hdrs = [
+        # Header is shared between implementations.
+        "//tensorflow/contrib/ffmpeg:ffmpeg_lib.h",
+    ],
     deps = [
         "//google/protobuf",
         "//tensorflow/core:framework_headers_lib",
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
index 629072ed7e1..8a7b6840f67 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc
@@ -13,7 +13,7 @@
 // limitations under the License.
 // =============================================================================
 
-#include "tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h"
+#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
 
 #include <errno.h>
 #include <stdlib.h>
@@ -212,9 +212,9 @@ Status ReadAudioFile(const string& filename,
   }
 }
 
-Status CreateAudioFile(const string& audio_format_id, int32 samples_per_second,
-                       int32 channel_count, const std::vector<float>& samples,
-                       string* output_data) {
+Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second,
+                       int32 samples_per_second, int32 channel_count,
+                       const std::vector<float>& samples, string* output_data) {
   if (audio_format_id != "wav") {
     return Status(error::Code::INVALID_ARGUMENT,
                   "CreateAudioFile only supports the 'wav' audio format.");
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc
index 9001341e641..ec0b19f961a 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc
+++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib_test.cc
@@ -13,7 +13,7 @@
 // limitations under the License.
 // =============================================================================
 
-#include "tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h"
+#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
 
 #include <stdlib.h>
 #include <vector>
@@ -91,7 +91,7 @@ TEST(FfmpegLibTest, TestRoundTripGeneratedWav) {
     sine_wave.push_back(std::sin(6.28 * 440.0 * i / 20000.0));
   }
   string content;
-  ASSERT_TRUE(CreateAudioFile("wav", 20000, 1, sine_wave, &content).ok());
+  ASSERT_TRUE(CreateAudioFile("wav", 0, 20000, 1, sine_wave, &content).ok());
   string temp_filename = GetTempFilename("wav");
   ASSERT_TRUE(WriteStringToFile(Env::Default(), temp_filename, content).ok());
   std::vector<float> roundtrip_data;
@@ -122,7 +122,7 @@ TEST(FfmpegLibTest, TestRoundTripWav) {
 
   string written_audio;
   ASSERT_TRUE(
-      CreateAudioFile("wav", 10000, 1, output_samples, &written_audio).ok());
+      CreateAudioFile("wav", 0, 10000, 1, output_samples, &written_audio).ok());
 
   EXPECT_EQ(original_audio, written_audio);
 }
diff --git a/tensorflow/contrib/ffmpeg/encode_audio_op.cc b/tensorflow/contrib/ffmpeg/encode_audio_op.cc
index 0997c0458db..46fcbc75d74 100644
--- a/tensorflow/contrib/ffmpeg/encode_audio_op.cc
+++ b/tensorflow/contrib/ffmpeg/encode_audio_op.cc
@@ -15,7 +15,7 @@
 
 #include <limits>
 
-#include "tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h"
+#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 
@@ -35,6 +35,8 @@ class EncodeAudioOp : public OpKernel {
         context, context->GetAttr("samples_per_second", &samples_per_second_));
     OP_REQUIRES(context, samples_per_second_ > 0,
                 errors::InvalidArgument("samples_per_second must be > 0."));
+    OP_REQUIRES_OK(
+        context, context->GetAttr("bits_per_second", &bits_per_second_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -61,9 +63,9 @@ class EncodeAudioOp : public OpKernel {
     }
     const int32 channel_count = contents.dim_size(1);
     string encoded_audio;
-    OP_REQUIRES_OK(context,
-                   CreateAudioFile(file_format_, samples_per_second_,
-                                   channel_count, samples, &encoded_audio));
+    OP_REQUIRES_OK(context, CreateAudioFile(file_format_, bits_per_second_,
+                                            samples_per_second_, channel_count,
+                                            samples, &encoded_audio));
 
     // Copy the encoded audio file to the output tensor.
     Tensor* output = nullptr;
@@ -75,6 +77,7 @@ class EncodeAudioOp : public OpKernel {
  private:
   string file_format_;
   int32 samples_per_second_;
+  int32 bits_per_second_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("EncodeAudio").Device(DEVICE_CPU), EncodeAudioOp);
@@ -84,6 +87,7 @@ REGISTER_OP("EncodeAudio")
     .Output("contents: string")
     .Attr("file_format: string")
     .Attr("samples_per_second: int")
+    .Attr("bits_per_second: int = 192000")
     .Doc(R"doc(
 Processes a `Tensor` containing sampled audio with the number of channels
 and length of the audio specified by the dimensions of the `Tensor`. The
@@ -100,6 +104,8 @@ sampled_audio: A rank 2 tensor containing all tracks of the audio. Dimension 0
 contents: The binary audio file contents.
 file_format: A string describing the audio file format. This must be "wav".
 samples_per_second: The number of samples per second that the audio should have.
+bits_per_second: The approximate bitrate of the encoded audio file. This is
+    ignored by the "wav" file format.
 )doc");
 
 }  // namespace ffmpeg
diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
similarity index 83%
rename from tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h
rename to tensorflow/contrib/ffmpeg/ffmpeg_lib.h
index d7b8f957de5..46b42c14334 100644
--- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.h
+++ b/tensorflow/contrib/ffmpeg/ffmpeg_lib.h
@@ -13,10 +13,11 @@
 // limitations under the License.
 // =============================================================================
 
-#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_FFMPEG_DEFAULT_FFMPEG_LIB_H_
-#define THIRD_PARTY_TENSORFLOW_CONTRIB_FFMPEG_DEFAULT_FFMPEG_LIB_H_
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_FFMPEG_FFMPEG_LIB_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_FFMPEG_FFMPEG_LIB_H_
 
 #include <string>
+#include <vector>
 
 #include "tensorflow/core/lib/core/status.h"
 
@@ -40,9 +41,9 @@ Status ReadAudioFile(const string& filename,
 // contain a separate sample for each channel. Frames are ordered by time.
 // Currently, the implementation only supports wav files, and ffmpeg is not used
 // to create them.
-Status CreateAudioFile(const string& audio_format_id, int32 samples_per_second,
-                       int32 channel_count, const std::vector<float>& samples,
-                       string* output_data);
+Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second,
+                       int32 samples_per_second, int32 channel_count,
+                       const std::vector<float>& samples, string* output_data);
 
 }  // namespace ffmpeg
 }  // namespace tensorflow
diff --git a/tensorflow/tools/ci_build/builds/integration_tests.sh b/tensorflow/tools/ci_build/builds/integration_tests.sh
index a9a3de99936..24b4b45fec4 100755
--- a/tensorflow/tools/ci_build/builds/integration_tests.sh
+++ b/tensorflow/tools/ci_build/builds/integration_tests.sh
@@ -108,10 +108,10 @@ mkdir -p "${TEST_DIR}" || \
 test_ffmpeg_lib() {
   # If FFmpeg is not installed then run a test that assumes it is not installed.
   if [[ -z "$(which ffmpeg)" ]]; then
-    bazel test tensorflow/contrib/ffmpeg/kernels:ffmpeg_lib_uninstalled_test
+    bazel test tensorflow/contrib/ffmpeg/default:ffmpeg_lib_uninstalled_test
     return $?
   else
-    bazel test tensorflow/contrib/ffmpeg/kernels:ffmpeg_lib_installed_test \
+    bazel test tensorflow/contrib/ffmpeg/default:ffmpeg_lib_installed_test \
         tensorflow/contrib/ffmpeg:decode_audio_op_test
     return $?
   fi

From acac487ac4ebaa6edb3e3f866d41cbd12546a107 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Wed, 25 May 2016 13:39:41 -0800
Subject: [PATCH 14/20] Makes `tf.Graph()` thread-safe for a single writer and
 multiple readers.

Adds a warning that is raised when multiple threads attempt to mutate
the graph, which would result in undefined behavior.

Supporting multiple *writers* would require a larger rewrite, which is
deferred for now.

Fixes #2425.
Change: 123255083
---
 tensorflow/python/framework/ops.py | 145 ++++++++++++++++-------------
 1 file changed, 82 insertions(+), 63 deletions(-)

diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 37178746933..9f28ad8b64e 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -1916,10 +1916,18 @@ class Graph(object):
 
   def __init__(self):
     """Creates a new, empty Graph."""
-    self._nodes_by_id = dict()
-    self._next_node_id = [dict()]
-    self._next_id_counter = 0
-    self._nodes_by_name = dict()
+    # Protects the core state that may be accessed by multiple readers.
+    # Only state that can be returned via public accessors (`as_graph_def()`,
+    # `get_operations()`, `as_graph_element()`, `get_collection()`, and
+    # `get_collection_ref()`) is by the lock. Thread-safety is provided on a
+    # best-effort basis to support buggy programs, and is not guaranteed by the
+    # public `tf.Graph` API.
+    # NOTE(mrry): This does not protect the various stacks. A warning will
+    # be reported if these are used from multiple threads
+    self._lock = threading.Lock()
+    self._nodes_by_id = dict()  # GUARDED_BY(self._lock)
+    self._next_id_counter = 0  # GUARDED_BY(self._lock)
+    self._nodes_by_name = dict()  # GUARDED_BY(self._lock)
     # Current name stack: uniquified names
     self._name_stack = ""
     # Maps a name used in the graph to the next id to use for that name.
@@ -1987,15 +1995,15 @@ class Graph(object):
     self._check_not_finalized()
     if not isinstance(op, (Tensor, Operation)):
       raise TypeError("op must be a Tensor or Operation: %s" % op)
-
-    if op._id in self._nodes_by_id:
-      raise ValueError("cannot add an op with id %d as it already "
-                       "exists in the graph" % op._id)
-    if op.name in self._nodes_by_name:
-      raise ValueError("cannot add op with name %s as that name "
-                       "is already used" % op.name)
-    self._nodes_by_id[op._id] = op
-    self._nodes_by_name[op.name] = op
+    with self._lock:
+      if op._id in self._nodes_by_id:
+        raise ValueError("cannot add an op with id %d as it already "
+                         "exists in the graph" % op._id)
+      if op.name in self._nodes_by_name:
+        raise ValueError("cannot add op with name %s as that name "
+                         "is already used" % op.name)
+      self._nodes_by_id[op._id] = op
+      self._nodes_by_name[op.name] = op
 
   @property
   def version(self):
@@ -2081,31 +2089,32 @@ class Graph(object):
     Raises:
       ValueError: If the `graph_def` would be too large.
     """
-    graph = graph_pb2.GraphDef()
-    graph.versions.CopyFrom(self._graph_def_versions)
-    bytesize = 0
-    for op_id in sorted(self._nodes_by_id):
-      op = self._nodes_by_id[op_id]
-      if from_version is None or op_id > from_version:
-        graph.node.extend([op.node_def])
-        if op.outputs and add_shapes:
-          assert "_output_shapes" not in graph.node[-1].attr
-          graph.node[-1].attr["_output_shapes"].list.shape.extend([
-              output.get_shape().as_proto() for output in op.outputs])
-        bytesize += op.node_def.ByteSize()
-        if bytesize >= (1 << 31) or bytesize < 0:
-          raise ValueError("GraphDef cannot be larger than 2GB.")
-    if self._functions:
-      for f in self._functions.values():
-        bytesize += f.ByteSize()
-        if bytesize >= (1 << 31) or bytesize < 0:
-          raise ValueError("GraphDef cannot be larger than 2GB.")
-      graph.library.function.extend(self._functions.values())
-      for func in self._function_gradient:
-        grad_def = function_pb2.GradientDef()
-        grad_def.function_name = func
-        grad_def.gradient_func = self._function_gradient[func]
-        graph.library.gradient.extend([grad_def])
+    with self._lock:
+      graph = graph_pb2.GraphDef()
+      graph.versions.CopyFrom(self._graph_def_versions)
+      bytesize = 0
+      for op_id in sorted(self._nodes_by_id):
+        op = self._nodes_by_id[op_id]
+        if from_version is None or op_id > from_version:
+          graph.node.extend([op.node_def])
+          if op.outputs and add_shapes:
+            assert "_output_shapes" not in graph.node[-1].attr
+            graph.node[-1].attr["_output_shapes"].list.shape.extend([
+                output.get_shape().as_proto() for output in op.outputs])
+          bytesize += op.node_def.ByteSize()
+          if bytesize >= (1 << 31) or bytesize < 0:
+            raise ValueError("GraphDef cannot be larger than 2GB.")
+      if self._functions:
+        for f in self._functions.values():
+          bytesize += f.ByteSize()
+          if bytesize >= (1 << 31) or bytesize < 0:
+            raise ValueError("GraphDef cannot be larger than 2GB.")
+        graph.library.function.extend(self._functions.values())
+        for func in self._function_gradient:
+          grad_def = function_pb2.GradientDef()
+          grad_def.function_name = func
+          grad_def.gradient_func = self._function_gradient[func]
+          graph.library.gradient.extend([grad_def])
 
     return graph
 
@@ -2298,7 +2307,11 @@ class Graph(object):
         example, an invalid string.
       KeyError: If `obj` is not an object in the graph.
     """
+    with self._lock:
+      return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
 
+  def _as_graph_element_locked(self, obj, allow_tensor, allow_operation):
+    """See `Graph.as_graph_element()` for details."""
     # The vast majority of this function is figuring
     # out what an API user might be doing wrong, so
     # that we can give helpful error messages.
@@ -2398,7 +2411,8 @@ class Graph(object):
     Returns:
       A list of Operations.
     """
-    return list(self._nodes_by_id.values())
+    with self._lock:
+      return list(self._nodes_by_id.values())
 
   def get_operation_by_name(self, name):
     """Returns the `Operation` with the given `name`.
@@ -2445,8 +2459,9 @@ class Graph(object):
   def _next_id(self):
     """Id for next Operation instance. Also increments the internal id."""
     self._check_not_finalized()
-    self._next_id_counter += 1
-    return self._next_id_counter
+    with self._lock:
+      self._next_id_counter += 1
+      return self._next_id_counter
 
   @property
   def _last_id(self):
@@ -2499,10 +2514,11 @@ class Graph(object):
       value: The value to add to the collection.
     """
     self._check_not_finalized()
-    if name not in self._collections:
-      self._collections[name] = [value]
-    else:
-      self._collections[name].append(value)
+    with self._lock:
+      if name not in self._collections:
+        self._collections[name] = [value]
+      else:
+        self._collections[name].append(value)
 
   def add_to_collections(self, names, value):
     """Stores `value` in the collections given by `names`.
@@ -2543,11 +2559,12 @@ class Graph(object):
       The list of values in the collection with the given `name`, or an empty
       list if no value has been added to that collection.
     """
-    coll_list = self._collections.get(name, None)
-    if coll_list is None:
-      coll_list = []
-      self._collections[name] = coll_list
-    return coll_list
+    with self._lock:
+      coll_list = self._collections.get(name, None)
+      if coll_list is None:
+        coll_list = []
+        self._collections[name] = coll_list
+      return coll_list
 
   def get_collection(self, name, scope=None):
     """Returns a list of values in the collection with the given `name`.
@@ -2571,22 +2588,24 @@ class Graph(object):
       list contains the values in the order under which they were
       collected.
     """
-    coll_list = self._collections.get(name, None)
-    if coll_list is None:
-      return []
-    if scope is None:
-      return list(coll_list)
-    else:
-      c = []
-      regex = re.compile(scope)
-      for item in coll_list:
-        if hasattr(item, "name") and regex.match(item.name):
-          c.append(item)
-      return c
+    with self._lock:
+      coll_list = self._collections.get(name, None)
+      if coll_list is None:
+        return []
+      if scope is None:
+        return list(coll_list)
+      else:
+        c = []
+        regex = re.compile(scope)
+        for item in coll_list:
+          if hasattr(item, "name") and regex.match(item.name):
+            c.append(item)
+        return c
 
   def get_all_collection_keys(self):
     """Returns a list of collections used in this graph."""
-    return [x for x in self._collections if isinstance(x, six.string_types)]
+    with self._lock:
+      return [x for x in self._collections if isinstance(x, six.string_types)]
 
   @contextlib.contextmanager
   def _original_op(self, op):

From 17f1937035a7715d6f97ffe5a7359e618503fcd4 Mon Sep 17 00:00:00 2001
From: Illia Polosukhin <ilblackdragon@gmail.com>
Date: Wed, 25 May 2016 13:40:37 -0800
Subject: [PATCH 15/20] Fix for predict when single-head classification
 Estimator is used. Change: 123255202

---
 .../python/learn/estimators/estimator.py      | 10 +++++---
 .../python/learn/estimators/estimator_test.py | 24 +++++++++++++++++++
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index 0fce7d140f1..1f476e13937 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -563,9 +563,13 @@ class Estimator(BaseEstimator):
                                     input_fn=input_fn,
                                     batch_size=batch_size)
     if self._classification:
-      for key in predictions:
-        cur_axis = (len(predictions[key].shape) - 1) if axis is None else axis
-        predictions[key] = np.argmax(predictions[key], axis=cur_axis)
+      if isinstance(predictions, dict):
+        for key in predictions:
+          cur_axis = (len(predictions[key].shape) - 1) if axis is None else axis
+          predictions[key] = np.argmax(predictions[key], axis=cur_axis)
+      else:
+        cur_axis = (len(predictions.shape) - 1) if axis is None else axis
+        predictions = np.argmax(predictions, axis=cur_axis)
     return predictions
 
   def predict_proba(self, x=None, input_fn=None, batch_size=None):
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
index 40a455c6bf1..b45cf8af168 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
@@ -36,6 +36,17 @@ def boston_input_fn():
   return features, target
 
 
+def iris_input_fn():
+  iris = tf.contrib.learn.datasets.load_iris()
+  features = tf.cast(
+      tf.reshape(
+          tf.constant(iris.data), [-1, 4]), tf.float32)
+  target = tf.cast(
+      tf.reshape(
+          tf.constant(iris.target), [-1, 1]), tf.int32)
+  return features, target
+
+
 def boston_eval_fn():
   boston = tf.contrib.learn.datasets.load_boston()
   n_examples = len(boston.target)
@@ -52,6 +63,10 @@ def linear_model_fn(features, target, unused_mode):
   return tf.contrib.learn.models.linear_regression_zero_init(features, target)
 
 
+def logistic_model_fn(features, target, unused_mode):
+  return tf.contrib.learn.models.logistic_regression_zero_init(features, target)
+
+
 class CheckCallsMonitor(tf.contrib.learn.monitors.BaseMonitor):
 
   def __init__(self):
@@ -84,6 +99,15 @@ class EstimatorTest(tf.test.TestCase):
     other_score = mean_squared_error(predictions, boston.target)
     self.assertAllClose(other_score, scores['mean_squared_error'])
 
+  def testIrisAll(self):
+    iris = tf.contrib.learn.datasets.load_iris()
+    est = tf.contrib.learn.Estimator(model_fn=logistic_model_fn,
+                                     classification=True)
+    est.train(input_fn=iris_input_fn, steps=100)
+    _ = est.evaluate(input_fn=iris_input_fn, steps=1)
+    predictions = est.predict(x=iris.data)
+    self.assertEqual(predictions.shape[0], iris.target.shape[0])
+
   def testTrainInputFn(self):
     est = tf.contrib.learn.Estimator(model_fn=linear_model_fn,
                                      classification=False)

From 55e8a9211eeaa38d99f4018e104dced96426459f Mon Sep 17 00:00:00 2001
From: Vijay Vasudevan <vrv@google.com>
Date: Wed, 25 May 2016 13:44:36 -0800
Subject: [PATCH 16/20] Reduce dimension size for matmul of complex due to
 flakiness. Change: 123255630

---
 tensorflow/python/kernel_tests/matmul_op_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py
index 0e7f2efe61f..25553097a6c 100644
--- a/tensorflow/python/kernel_tests/matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/matmul_op_test.py
@@ -156,14 +156,14 @@ class MatMulTest(tf.test.TestCase):
 
   def testComplex64Random(self):
     for _ in range(10):
-      n, k, m = np.random.randint(1, 100, size=3)
+      n, k, m = np.random.randint(1, 10, size=3)  # Smaller range than float
       x = self._randMatrix(n, k, np.complex64)
       y = self._randMatrix(k, m, np.complex64)
       self._testCpuMatmul(x, y)
 
   def testComplex128Random(self):
     for _ in range(10):
-      n, k, m = np.random.randint(1, 100, size=3)
+      n, k, m = np.random.randint(1, 10, size=3)  # Smaller range than float
       x = self._randMatrix(n, k, np.complex128)
       y = self._randMatrix(k, m, np.complex128)
       self._testCpuMatmul(x, y)

From 6e89233b7427b0797a01b6b77bf3bb5dd5eaa601 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <nobody@tensorflow.org>
Date: Wed, 25 May 2016 14:00:41 -0800
Subject: [PATCH 17/20] Make the gput_allocator_retry_test *Fail test cases
 more reliable by having each consumer thread pause for 1 msec between getting
 its memory and returning it.

Logged failures for the NoRetryFail case suggest that on OS/X thread
scheduling is such that without this delay it's possible not to experience
a race condition failure within 10 seconds.
Change: 123257296
---
 .../core/common_runtime/gpu/gpu_allocator_retry_test.cc  | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc b/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc
index c03cb27df50..c911290f28b 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_allocator_retry_test.cc
@@ -81,6 +81,9 @@ class GPUAllocatorRetryTest : public ::testing::Test {
                   return;
                 }
               }
+              // Failures are more likely to occur if each consumer
+              // delays for a while before returning the memory.
+              Env::Default()->SleepForMicroseconds(500);
               ++consumer_count_[i];
               for (int j = 0; j < cap_needed; ++j) {
                 alloc_->DeallocateRaw(ptr);
@@ -141,9 +144,10 @@ TEST_F(GPUAllocatorRetryTest, RetrySuccess) {
   EXPECT_GT(consumer_count_[2], 0);
 }
 
-/* Disabled due to flakiness.  b/24738751
 // Verifies OutOfMemory failure when memory is slightly overcommitted
-// and retry is not allowed.
+// and retry is not allowed.  Note that this test will fail, i.e. no
+// memory alloc failure will be detected, if it is run in a context that
+// does not permit real multi-threaded execution.
 TEST_F(GPUAllocatorRetryTest, NoRetryFail) {
   // Support up to 2 allocations simultaneously, waits up to 0 msec for
   // a chance to alloc.
@@ -162,7 +166,6 @@ TEST_F(GPUAllocatorRetryTest, NoRetryFail) {
     EXPECT_TRUE(has_failed_);
   }
 }
-*/
 
 // Verifies OutOfMemory failure when retry is allowed but memory capacity
 // is too low even for retry.

From 8e9f29598a21d6a409254578f74532dfc080b454 Mon Sep 17 00:00:00 2001
From: "David G. Andersen" <dga@google.com>
Date: Wed, 25 May 2016 14:10:46 -0800
Subject: [PATCH 18/20] Let Concat properly handle concat dim > 2^31 when
 dealing with Very Large Tensors. Change: 123258451

---
 tensorflow/core/kernels/concat_op.cc             |  2 +-
 tensorflow/python/kernel_tests/concat_op_test.py | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/concat_op.cc b/tensorflow/core/kernels/concat_op.cc
index 36cd60a9da0..9af6dfb8d5a 100644
--- a/tensorflow/core/kernels/concat_op.cc
+++ b/tensorflow/core/kernels/concat_op.cc
@@ -76,7 +76,7 @@ class ConcatOp : public OpKernel {
     for (int d = 0; d < concat_dim; ++d) {
       inputs_flat_dim0 *= input_shape.dim_size(d);
     }
-    int output_concat_dim = 0;
+    int64 output_concat_dim = 0;
     const bool input_is_scalar = IsLegacyScalar(input_shape);
     for (int i = 0; i < N; ++i) {
       const auto in = values[i];
diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py
index 038799681cb..97452a791d0 100644
--- a/tensorflow/python/kernel_tests/concat_op_test.py
+++ b/tensorflow/python/kernel_tests/concat_op_test.py
@@ -412,6 +412,17 @@ class ConcatOpTest(tf.test.TestCase):
       self.assertEqual(n + 3, after - before)
       print("graph = ", [x.name for x in g.get_operations()])
 
+  def testConcatLargeTensors(self):
+    # CPU-only test, because it fails on GPUs with <= 4GB memory.
+    with tf.device("/cpu:0"):
+      a = tf.ones([2**31 + 6], dtype=tf.int8)
+      b = tf.zeros([1024], dtype=tf.int8)
+      onezeros = tf.concat(0, [a, b])
+    with self.test_session(use_gpu=False):
+      # TODO(dga):  Add more depth to this test to validate correctness,
+      # not just non-crashingness, once other large tensor fixes have gone in.
+      _ = onezeros.eval()
+
 
 class ConcatOffsetTest(tf.test.TestCase):
 

From 1db1272f7d75131559fa15cc5013ff46735f9c58 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@gmail.com>
Date: Wed, 25 May 2016 14:25:35 -0800
Subject: [PATCH 19/20] Rename tf.contrib.distributions.Gaussian ->
 tf.contrib.distributions.Normal Change: 123260073

---
 tensorflow/contrib/distributions/BUILD        |  8 +--
 tensorflow/contrib/distributions/__init__.py  | 12 ++---
 ...py => normal_conjugate_posteriors_test.py} | 34 ++++++------
 .../{gaussian_test.py => normal_test.py}      | 46 ++++++++--------
 .../python/ops/{gaussian.py => normal.py}     | 44 ++++++++--------
 ...iors.py => normal_conjugate_posteriors.py} | 52 +++++++++----------
 6 files changed, 98 insertions(+), 98 deletions(-)
 rename tensorflow/contrib/distributions/python/kernel_tests/{gaussian_conjugate_posteriors_test.py => normal_conjugate_posteriors_test.py} (74%)
 rename tensorflow/contrib/distributions/python/kernel_tests/{gaussian_test.py => normal_test.py} (79%)
 rename tensorflow/contrib/distributions/python/ops/{gaussian.py => normal.py} (82%)
 rename tensorflow/contrib/distributions/python/ops/{gaussian_conjugate_posteriors.py => normal_conjugate_posteriors.py} (73%)

diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD
index 451a34320e0..44263dc8aed 100644
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -55,9 +55,9 @@ cuda_py_tests(
 )
 
 cuda_py_tests(
-    name = "gaussian_test",
+    name = "normal_test",
     size = "small",
-    srcs = ["python/kernel_tests/gaussian_test.py"],
+    srcs = ["python/kernel_tests/normal_test.py"],
     additional_deps = [
         ":distributions_py",
         "//tensorflow/python:framework_test_lib",
@@ -98,9 +98,9 @@ cuda_py_tests(
 )
 
 cuda_py_tests(
-    name = "gaussian_conjugate_posteriors_test",
+    name = "normal_conjugate_posteriors_test",
     size = "small",
-    srcs = ["python/kernel_tests/gaussian_conjugate_posteriors_test.py"],
+    srcs = ["python/kernel_tests/normal_conjugate_posteriors_test.py"],
     additional_deps = [
         ":distributions_py",
         "//tensorflow/python:platform_test",
diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py
index 2c8a0343b28..7fa8c0fb0c6 100644
--- a/tensorflow/contrib/distributions/__init__.py
+++ b/tensorflow/contrib/distributions/__init__.py
@@ -30,7 +30,7 @@ initialized with parameters that define the distributions.
 @@Chi2
 @@Exponential
 @@Gamma
-@@Gaussian
+@@Normal
 @@StudentT
 @@Uniform
 
@@ -44,10 +44,10 @@ initialized with parameters that define the distributions.
 Functions that transform conjugate prior/likelihood pairs to distributions
 representing the posterior or posterior predictive.
 
-### Gaussian likelihood with conjugate prior.
+### Normal likelihood with conjugate prior.
 
-@@gaussian_conjugates_known_sigma_posterior
-@@gaussian_congugates_known_sigma_predictive
+@@normal_conjugates_known_sigma_posterior
+@@normal_congugates_known_sigma_predictive
 """
 from __future__ import absolute_import
 from __future__ import division
@@ -60,8 +60,8 @@ from tensorflow.contrib.distributions.python.ops.dirichlet_multinomial import *
 from tensorflow.contrib.distributions.python.ops.distribution import *
 from tensorflow.contrib.distributions.python.ops.exponential import *
 from tensorflow.contrib.distributions.python.ops.gamma import *
-from tensorflow.contrib.distributions.python.ops.gaussian import *
-from tensorflow.contrib.distributions.python.ops.gaussian_conjugate_posteriors import *
 from tensorflow.contrib.distributions.python.ops.mvn import *
+from tensorflow.contrib.distributions.python.ops.normal import *
+from tensorflow.contrib.distributions.python.ops.normal_conjugate_posteriors import *
 from tensorflow.contrib.distributions.python.ops.student_t import *
 from tensorflow.contrib.distributions.python.ops.uniform import *
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/gaussian_conjugate_posteriors_test.py b/tensorflow/contrib/distributions/python/kernel_tests/normal_conjugate_posteriors_test.py
similarity index 74%
rename from tensorflow/contrib/distributions/python/kernel_tests/gaussian_conjugate_posteriors_test.py
rename to tensorflow/contrib/distributions/python/kernel_tests/normal_conjugate_posteriors_test.py
index c3a2464b5bd..1d03396bf68 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/gaussian_conjugate_posteriors_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/normal_conjugate_posteriors_test.py
@@ -25,9 +25,9 @@ import tensorflow as tf
 distributions = tf.contrib.distributions
 
 
-class GaussianTest(tf.test.TestCase):
+class NormalTest(tf.test.TestCase):
 
-  def testGaussianConjugateKnownSigmaPosterior(self):
+  def testNormalConjugateKnownSigmaPosterior(self):
     with tf.Session():
       mu0 = tf.constant([3.0])
       sigma0 = tf.constant([math.sqrt(10.0)])
@@ -35,16 +35,16 @@ class GaussianTest(tf.test.TestCase):
       x = tf.constant([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0])
       s = tf.reduce_sum(x)
       n = tf.size(x)
-      prior = distributions.Gaussian(mu=mu0, sigma=sigma0)
-      posterior = distributions.gaussian_conjugates_known_sigma_posterior(
+      prior = distributions.Normal(mu=mu0, sigma=sigma0)
+      posterior = distributions.normal_conjugates_known_sigma_posterior(
           prior=prior, sigma=sigma, s=s, n=n)
 
       # Smoke test
-      self.assertTrue(isinstance(posterior, distributions.Gaussian))
+      self.assertTrue(isinstance(posterior, distributions.Normal))
       posterior_log_pdf = posterior.log_pdf(x).eval()
       self.assertEqual(posterior_log_pdf.shape, (6,))
 
-  def testGaussianConjugateKnownSigmaPosteriorND(self):
+  def testNormalConjugateKnownSigmaPosteriorND(self):
     with tf.Session():
       batch_size = 6
       mu0 = tf.constant([[3.0, -3.0]] * batch_size)
@@ -54,16 +54,16 @@ class GaussianTest(tf.test.TestCase):
           tf.constant([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=tf.float32))
       s = tf.reduce_sum(x)
       n = tf.size(x)
-      prior = distributions.Gaussian(mu=mu0, sigma=sigma0)
-      posterior = distributions.gaussian_conjugates_known_sigma_posterior(
+      prior = distributions.Normal(mu=mu0, sigma=sigma0)
+      posterior = distributions.normal_conjugates_known_sigma_posterior(
           prior=prior, sigma=sigma, s=s, n=n)
 
       # Smoke test
-      self.assertTrue(isinstance(posterior, distributions.Gaussian))
+      self.assertTrue(isinstance(posterior, distributions.Normal))
       posterior_log_pdf = posterior.log_pdf(x).eval()
       self.assertEqual(posterior_log_pdf.shape, (6, 2))
 
-  def testGaussianConjugateKnownSigmaNDPosteriorND(self):
+  def testNormalConjugateKnownSigmaNDPosteriorND(self):
     with tf.Session():
       batch_size = 6
       mu0 = tf.constant([[3.0, -3.0]] * batch_size)
@@ -75,19 +75,19 @@ class GaussianTest(tf.test.TestCase):
       s = tf.reduce_sum(x, reduction_indices=[1])
       x = tf.transpose(x)  # Reshape to shape (6, 2)
       n = tf.constant([6] * 2)
-      prior = distributions.Gaussian(mu=mu0, sigma=sigma0)
-      posterior = distributions.gaussian_conjugates_known_sigma_posterior(
+      prior = distributions.Normal(mu=mu0, sigma=sigma0)
+      posterior = distributions.normal_conjugates_known_sigma_posterior(
           prior=prior, sigma=sigma, s=s, n=n)
 
       # Smoke test
-      self.assertTrue(isinstance(posterior, distributions.Gaussian))
+      self.assertTrue(isinstance(posterior, distributions.Normal))
 
       # Calculate log_pdf under the 2 models
       posterior_log_pdf = posterior.log_pdf(x)
       self.assertEqual(posterior_log_pdf.get_shape(), (6, 2))
       self.assertEqual(posterior_log_pdf.eval().shape, (6, 2))
 
-  def testGaussianConjugateKnownSigmaPredictive(self):
+  def testNormalConjugateKnownSigmaPredictive(self):
     with tf.Session():
       batch_size = 6
       mu0 = tf.constant([3.0] * batch_size)
@@ -96,12 +96,12 @@ class GaussianTest(tf.test.TestCase):
       x = tf.constant([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0])
       s = tf.reduce_sum(x)
       n = tf.size(x)
-      prior = distributions.Gaussian(mu=mu0, sigma=sigma0)
-      predictive = distributions.gaussian_congugates_known_sigma_predictive(
+      prior = distributions.Normal(mu=mu0, sigma=sigma0)
+      predictive = distributions.normal_congugates_known_sigma_predictive(
           prior=prior, sigma=sigma, s=s, n=n)
 
       # Smoke test
-      self.assertTrue(isinstance(predictive, distributions.Gaussian))
+      self.assertTrue(isinstance(predictive, distributions.Normal))
       predictive_log_pdf = predictive.log_pdf(x).eval()
       self.assertEqual(predictive_log_pdf.shape, (6,))
 
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/gaussian_test.py b/tensorflow/contrib/distributions/python/kernel_tests/normal_test.py
similarity index 79%
rename from tensorflow/contrib/distributions/python/kernel_tests/gaussian_test.py
rename to tensorflow/contrib/distributions/python/kernel_tests/normal_test.py
index f0a82df901c..0e9f8a40cca 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/gaussian_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/normal_test.py
@@ -24,9 +24,9 @@ import numpy as np
 import tensorflow as tf
 
 
-class GaussianTest(tf.test.TestCase):
+class NormalTest(tf.test.TestCase):
 
-  def testGaussianLogPDF(self):
+  def testNormalLogPDF(self):
     with tf.Session():
       batch_size = 6
       mu = tf.constant([3.0] * batch_size)
@@ -34,18 +34,18 @@ class GaussianTest(tf.test.TestCase):
       mu_v = 3.0
       sigma_v = np.sqrt(10.0)
       x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32)
-      gaussian = tf.contrib.distributions.Gaussian(mu=mu, sigma=sigma)
+      normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma)
       expected_log_pdf = np.log(
           1 / np.sqrt(2 * np.pi) / sigma_v
           * np.exp(-1.0 / (2 * sigma_v**2) * (x - mu_v)**2))
 
-      log_pdf = gaussian.log_pdf(x)
+      log_pdf = normal.log_pdf(x)
       self.assertAllClose(expected_log_pdf, log_pdf.eval())
 
-      pdf = gaussian.pdf(x)
+      pdf = normal.pdf(x)
       self.assertAllClose(np.exp(expected_log_pdf), pdf.eval())
 
-  def testGaussianLogPDFMultidimensional(self):
+  def testNormalLogPDFMultidimensional(self):
     with tf.Session():
       batch_size = 6
       mu = tf.constant([[3.0, -3.0]] * batch_size)
@@ -53,22 +53,22 @@ class GaussianTest(tf.test.TestCase):
       mu_v = np.array([3.0, -3.0])
       sigma_v = np.array([np.sqrt(10.0), np.sqrt(15.0)])
       x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T
-      gaussian = tf.contrib.distributions.Gaussian(mu=mu, sigma=sigma)
+      normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma)
       expected_log_pdf = np.log(
           1 / np.sqrt(2 * np.pi) / sigma_v
           * np.exp(-1.0 / (2 * sigma_v**2) * (x - mu_v)**2))
 
-      log_pdf = gaussian.log_pdf(x)
+      log_pdf = normal.log_pdf(x)
       log_pdf_values = log_pdf.eval()
       self.assertEqual(log_pdf.get_shape(), (6, 2))
       self.assertAllClose(expected_log_pdf, log_pdf_values)
 
-      pdf = gaussian.pdf(x)
+      pdf = normal.pdf(x)
       pdf_values = pdf.eval()
       self.assertEqual(pdf.get_shape(), (6, 2))
       self.assertAllClose(np.exp(expected_log_pdf), pdf_values)
 
-  def testGaussianCDF(self):
+  def testNormalCDF(self):
     with tf.Session():
       batch_size = 6
       mu = tf.constant([3.0] * batch_size)
@@ -77,40 +77,40 @@ class GaussianTest(tf.test.TestCase):
       sigma_v = np.sqrt(10.0)
       x = np.array([-2.5, 2.5, 4.0, 0.0, -1.0, 2.0], dtype=np.float32)
 
-      gaussian = tf.contrib.distributions.Gaussian(mu=mu, sigma=sigma)
+      normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma)
       erf_fn = np.vectorize(math.erf)
 
       # From Wikipedia
       expected_cdf = 0.5 * (1.0 + erf_fn((x - mu_v)/(sigma_v*np.sqrt(2))))
 
-      cdf = gaussian.cdf(x)
+      cdf = normal.cdf(x)
       self.assertAllClose(expected_cdf, cdf.eval())
 
-  def testGaussianEntropy(self):
+  def testNormalEntropy(self):
     with tf.Session():
       mu_v = np.array([1.0, 1.0, 1.0])
       sigma_v = np.array([[1.0, 2.0, 3.0]]).T
-      gaussian = tf.contrib.distributions.Gaussian(mu=mu_v, sigma=sigma_v)
+      normal = tf.contrib.distributions.Normal(mu=mu_v, sigma=sigma_v)
 
       sigma_broadcast = mu_v * sigma_v
       expected_entropy = 0.5 * np.log(2*np.pi*np.exp(1)*sigma_broadcast**2)
-      self.assertAllClose(expected_entropy, gaussian.entropy().eval())
+      self.assertAllClose(expected_entropy, normal.entropy().eval())
 
-  def testGaussianSample(self):
+  def testNormalSample(self):
     with tf.Session():
       mu = tf.constant(3.0)
       sigma = tf.constant(math.sqrt(10.0))
       mu_v = 3.0
       sigma_v = np.sqrt(10.0)
       n = tf.constant(100000)
-      gaussian = tf.contrib.distributions.Gaussian(mu=mu, sigma=sigma)
-      samples = gaussian.sample(n, seed=137)
+      normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma)
+      samples = normal.sample(n, seed=137)
       sample_values = samples.eval()
       self.assertEqual(sample_values.shape, (100000,))
       self.assertAllClose(sample_values.mean(), mu_v, atol=1e-2)
       self.assertAllClose(sample_values.std(), sigma_v, atol=1e-1)
 
-  def testGaussianSampleMultiDimensional(self):
+  def testNormalSampleMultiDimensional(self):
     with tf.Session():
       batch_size = 2
       mu = tf.constant([[3.0, -3.0]] * batch_size)
@@ -118,8 +118,8 @@ class GaussianTest(tf.test.TestCase):
       mu_v = [3.0, -3.0]
       sigma_v = [np.sqrt(10.0), np.sqrt(15.0)]
       n = tf.constant(100000)
-      gaussian = tf.contrib.distributions.Gaussian(mu=mu, sigma=sigma)
-      samples = gaussian.sample(n, seed=137)
+      normal = tf.contrib.distributions.Normal(mu=mu, sigma=sigma)
+      samples = normal.sample(n, seed=137)
       sample_values = samples.eval()
       self.assertEqual(samples.get_shape(), (100000, batch_size, 2))
       self.assertAllClose(sample_values[:, 0, 0].mean(), mu_v[0], atol=1e-2)
@@ -129,13 +129,13 @@ class GaussianTest(tf.test.TestCase):
 
   def testNegativeSigmaFails(self):
     with tf.Session():
-      gaussian = tf.contrib.distributions.Gaussian(
+      normal = tf.contrib.distributions.Normal(
           mu=[1.],
           sigma=[-5.],
           name='G')
       with self.assertRaisesOpError(
           r'should contain only positive values'):
-        gaussian.mean.eval()
+        normal.mean.eval()
 
 if __name__ == '__main__':
   tf.test.main()
diff --git a/tensorflow/contrib/distributions/python/ops/gaussian.py b/tensorflow/contrib/distributions/python/ops/normal.py
similarity index 82%
rename from tensorflow/contrib/distributions/python/ops/gaussian.py
rename to tensorflow/contrib/distributions/python/ops/normal.py
index 8e2049444af..dc08a0e1dec 100644
--- a/tensorflow/contrib/distributions/python/ops/gaussian.py
+++ b/tensorflow/contrib/distributions/python/ops/normal.py
@@ -38,8 +38,8 @@ def _assert_all_positive(x):
       ["Tensor %s should contain only positive values: " % x.name, x])
 
 
-class Gaussian(object):
-  """The scalar Gaussian distribution with mean and stddev parameters mu, sigma.
+class Normal(object):
+  """The scalar Normal distribution with mean and stddev parameters mu, sigma.
 
   #### Mathematical details
 
@@ -52,15 +52,15 @@ class Gaussian(object):
   Examples of initialization of one or a batch of distributions.
 
   ```python
-  # Define a single scalar Gaussian distribution.
-  dist = tf.contrib.distributions.Gaussian(mu=0, sigma=3)
+  # Define a single scalar Normal distribution.
+  dist = tf.contrib.distributions.Normal(mu=0, sigma=3)
 
   # Evaluate the cdf at 1, returning a scalar.
   dist.cdf(1)
 
-  # Define a batch of two scalar valued Gaussians.
+  # Define a batch of two scalar valued Normals.
   # The first has mean 1 and standard deviation 11, the second 2 and 22.
-  dist = tf.contrib.distributions.Gaussian(mu=[1, 2.], sigma=[11, 22.])
+  dist = tf.contrib.distributions.Normal(mu=[1, 2.], sigma=[11, 22.])
 
   # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
   # returning a length two tensor.
@@ -73,9 +73,9 @@ class Gaussian(object):
   Arguments are broadcast when possible.
 
   ```python
-  # Define a batch of two scalar valued Gaussians.
+  # Define a batch of two scalar valued Normals.
   # Both have mean 1, but different standard deviations.
-  dist = tf.contrib.distributions.Gaussian(mu=1, sigma=[11, 22.])
+  dist = tf.contrib.distributions.Normal(mu=1, sigma=[11, 22.])
 
   # Evaluate the pdf of both distributions on the same point, 3.0,
   # returning a length 2 tensor.
@@ -85,7 +85,7 @@ class Gaussian(object):
   """
 
   def __init__(self, mu, sigma, name=None):
-    """Construct Gaussian distributions with mean and stddev `mu` and `sigma`.
+    """Construct Normal distributions with mean and stddev `mu` and `sigma`.
 
     The parameters `mu` and `sigma` must be shaped in a way that supports
     broadcasting (e.g. `mu + sigma` is a valid operation).
@@ -99,7 +99,7 @@ class Gaussian(object):
     Raises:
       TypeError: if mu and sigma are different dtypes.
     """
-    with ops.op_scope([mu, sigma], name, "Gaussian"):
+    with ops.op_scope([mu, sigma], name, "Normal"):
       mu = ops.convert_to_tensor(mu)
       sigma = ops.convert_to_tensor(sigma)
       with ops.control_dependencies([_assert_all_positive(sigma)]):
@@ -125,7 +125,7 @@ class Gaussian(object):
     return self._mu * array_ops.ones_like(self._sigma)
 
   def log_pdf(self, x, name=None):
-    """Log pdf of observations in `x` under these Gaussian distribution(s).
+    """Log pdf of observations in `x` under these Normal distribution(s).
 
     Args:
       x: tensor of dtype `dtype`, must be broadcastable with `mu` and `sigma`.
@@ -134,7 +134,7 @@ class Gaussian(object):
     Returns:
       log_pdf: tensor of dtype `dtype`, the log-PDFs of `x`.
     """
-    with ops.op_scope([self._mu, self._sigma, x], name, "GaussianLogPdf"):
+    with ops.op_scope([self._mu, self._sigma, x], name, "NormalLogPdf"):
       x = ops.convert_to_tensor(x)
       if x.dtype != self.dtype:
         raise TypeError("Input x dtype does not match dtype: %s vs. %s"
@@ -144,7 +144,7 @@ class Gaussian(object):
               -0.5*math_ops.square((x - self._mu) / self._sigma))
 
   def cdf(self, x, name=None):
-    """CDF of observations in `x` under these Gaussian distribution(s).
+    """CDF of observations in `x` under these Normal distribution(s).
 
     Args:
       x: tensor of dtype `dtype`, must be broadcastable with `mu` and `sigma`.
@@ -153,7 +153,7 @@ class Gaussian(object):
     Returns:
       cdf: tensor of dtype `dtype`, the CDFs of `x`.
     """
-    with ops.op_scope([self._mu, self._sigma, x], name, "GaussianCdf"):
+    with ops.op_scope([self._mu, self._sigma, x], name, "NormalCdf"):
       x = ops.convert_to_tensor(x)
       if x.dtype != self.dtype:
         raise TypeError("Input x dtype does not match dtype: %s vs. %s"
@@ -162,7 +162,7 @@ class Gaussian(object):
           1.0/(math.sqrt(2.0) * self._sigma)*(x - self._mu)))
 
   def log_cdf(self, x, name=None):
-    """Log CDF of observations `x` under these Gaussian distribution(s).
+    """Log CDF of observations `x` under these Normal distribution(s).
 
     Args:
       x: tensor of dtype `dtype`, must be broadcastable with `mu` and `sigma`.
@@ -171,11 +171,11 @@ class Gaussian(object):
     Returns:
       log_cdf: tensor of dtype `dtype`, the log-CDFs of `x`.
     """
-    with ops.op_scope([self._mu, self._sigma, x], name, "GaussianLogCdf"):
+    with ops.op_scope([self._mu, self._sigma, x], name, "NormalLogCdf"):
       return math_ops.log(self.cdf(x))
 
   def pdf(self, x, name=None):
-    """The PDF of observations in `x` under these Gaussian distribution(s).
+    """The PDF of observations in `x` under these Normal distribution(s).
 
     Args:
       x: tensor of dtype `dtype`, must be broadcastable with `mu` and `sigma`.
@@ -184,11 +184,11 @@ class Gaussian(object):
     Returns:
       pdf: tensor of dtype `dtype`, the pdf values of `x`.
     """
-    with ops.op_scope([self._mu, self._sigma, x], name, "GaussianPdf"):
+    with ops.op_scope([self._mu, self._sigma, x], name, "NormalPdf"):
       return math_ops.exp(self.log_pdf(x))
 
   def entropy(self, name=None):
-    """The entropy of Gaussian distribution(s).
+    """The entropy of Normal distribution(s).
 
     Args:
       name: The name to give this op.
@@ -196,7 +196,7 @@ class Gaussian(object):
     Returns:
       entropy: tensor of dtype `dtype`, the entropy.
     """
-    with ops.op_scope([self._mu, self._sigma], name, "GaussianEntropy"):
+    with ops.op_scope([self._mu, self._sigma], name, "NormalEntropy"):
       two_pi_e1 = constant_op.constant(
           2 * math.pi * math.exp(1), dtype=self.dtype)
       # Use broadcasting rules to calculate the full broadcast sigma.
@@ -204,7 +204,7 @@ class Gaussian(object):
       return 0.5 * math_ops.log(two_pi_e1 * math_ops.square(sigma))
 
   def sample(self, n, seed=None, name=None):
-    """Sample `n` observations from the Gaussian Distributions.
+    """Sample `n` observations from the Normal Distributions.
 
     Args:
       n: `Scalar`, type int32, the number of observations to sample.
@@ -215,7 +215,7 @@ class Gaussian(object):
       samples: `[n, ...]`, a `Tensor` of `n` samples for each
         of the distributions determined by broadcasting the hyperparameters.
     """
-    with ops.op_scope([self._mu, self._sigma, n], name, "GaussianSample"):
+    with ops.op_scope([self._mu, self._sigma, n], name, "NormalSample"):
       broadcast_shape = (self._mu + self._sigma).get_shape()
       n = ops.convert_to_tensor(n)
       shape = array_ops.concat(
diff --git a/tensorflow/contrib/distributions/python/ops/gaussian_conjugate_posteriors.py b/tensorflow/contrib/distributions/python/ops/normal_conjugate_posteriors.py
similarity index 73%
rename from tensorflow/contrib/distributions/python/ops/gaussian_conjugate_posteriors.py
rename to tensorflow/contrib/distributions/python/ops/normal_conjugate_posteriors.py
index c0089964152..45ddd3ada36 100644
--- a/tensorflow/contrib/distributions/python/ops/gaussian_conjugate_posteriors.py
+++ b/tensorflow/contrib/distributions/python/ops/normal_conjugate_posteriors.py
@@ -12,32 +12,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""The Gaussian distribution: conjugate posterior closed form calculations."""
+"""The Normal distribution: conjugate posterior closed form calculations."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.contrib.distributions.python.ops.gaussian import Gaussian  # pylint: disable=line-too-long
+from tensorflow.contrib.distributions.python.ops.normal import Normal  # pylint: disable=line-too-long
 
 from tensorflow.python.ops import math_ops
 
 
-def gaussian_conjugates_known_sigma_posterior(prior, sigma, s, n):
-  """Posterior Gaussian distribution with conjugate prior on the mean.
+def normal_conjugates_known_sigma_posterior(prior, sigma, s, n):
+  """Posterior Normal distribution with conjugate prior on the mean.
 
   This model assumes that `n` observations (with sum `s`) come from a
-  Gaussian with unknown mean `mu` (described by the Gaussian `prior`)
+  Normal with unknown mean `mu` (described by the Normal `prior`)
   and known variance `sigma^2`.  The "known sigma posterior" is
   the distribution of the unknown `mu`.
 
-  Accepts a prior Gaussian distribution object, having parameters
+  Accepts a prior Normal distribution object, having parameters
   `mu0` and `sigma0`, as well as known `sigma` values of the predictive
-  distribution(s) (also assumed Gaussian),
+  distribution(s) (also assumed Normal),
   and statistical estimates `s` (the sum(s) of the observations) and
   `n` (the number(s) of observations).
 
-  Returns a posterior (also Gaussian) distribution object, with parameters
+  Returns a posterior (also Normal) distribution object, with parameters
   `(mu', sigma'^2)`, where:
 
   ```
@@ -50,7 +50,7 @@ def gaussian_conjugates_known_sigma_posterior(prior, sigma, s, n):
   will broadcast in the case of multidimensional sets of parameters.
 
   Args:
-    prior: `Gaussian` object of type `dtype`:
+    prior: `Normal` object of type `dtype`:
       the prior distribution having parameters `(mu0, sigma0)`.
     sigma: tensor of type `dtype`, taking values `sigma > 0`.
       The known stddev parameter(s).
@@ -58,15 +58,15 @@ def gaussian_conjugates_known_sigma_posterior(prior, sigma, s, n):
     n: Tensor of type `int`.  The number(s) of observations.
 
   Returns:
-    A new Gaussian posterior distribution object for the unknown observation
+    A new Normal posterior distribution object for the unknown observation
     mean `mu`.
 
   Raises:
     TypeError: if dtype of `s` does not match `dtype`, or `prior` is not a
-      Gaussian object.
+      Normal object.
   """
-  if not isinstance(prior, Gaussian):
-    raise TypeError("Expected prior to be an instance of type Gaussian")
+  if not isinstance(prior, Normal):
+    raise TypeError("Expected prior to be an instance of type Normal")
 
   if s.dtype != prior.dtype:
     raise TypeError(
@@ -77,27 +77,27 @@ def gaussian_conjugates_known_sigma_posterior(prior, sigma, s, n):
   sigma0_2 = math_ops.square(prior.sigma)
   sigma_2 = math_ops.square(sigma)
   sigmap_2 = 1.0/(1/sigma0_2 + n/sigma_2)
-  return Gaussian(
+  return Normal(
       mu=(prior.mu/sigma0_2 + s/sigma_2) * sigmap_2,
       sigma=math_ops.sqrt(sigmap_2))
 
 
-def gaussian_congugates_known_sigma_predictive(prior, sigma, s, n):
-  """Posterior predictive Gaussian distribution w. conjugate prior on the mean.
+def normal_congugates_known_sigma_predictive(prior, sigma, s, n):
+  """Posterior predictive Normal distribution w. conjugate prior on the mean.
 
   This model assumes that `n` observations (with sum `s`) come from a
-  Gaussian with unknown mean `mu` (described by the Gaussian `prior`)
+  Normal with unknown mean `mu` (described by the Normal `prior`)
   and known variance `sigma^2`.  The "known sigma predictive"
   is the distribution of new observations, conditioned on the existing
   observations and our prior.
 
-  Accepts a prior Gaussian distribution object, having parameters
+  Accepts a prior Normal distribution object, having parameters
   `mu0` and `sigma0`, as well as known `sigma` values of the predictive
-  distribution(s) (also assumed Gaussian),
+  distribution(s) (also assumed Normal),
   and statistical estimates `s` (the sum(s) of the observations) and
   `n` (the number(s) of observations).
 
-  Calculates the Gaussian distribution(s) `p(x | sigma^2)`:
+  Calculates the Normal distribution(s) `p(x | sigma^2)`:
 
   ```
     p(x | sigma^2) = int N(x | mu, sigma^2) N(mu | prior.mu, prior.sigma^2) dmu
@@ -117,7 +117,7 @@ def gaussian_congugates_known_sigma_predictive(prior, sigma, s, n):
   will broadcast in the case of multidimensional sets of parameters.
 
   Args:
-    prior: `Gaussian` object of type `dtype`:
+    prior: `Normal` object of type `dtype`:
       the prior distribution having parameters `(mu0, sigma0)`.
     sigma: tensor of type `dtype`, taking values `sigma > 0`.
       The known stddev parameter(s).
@@ -125,14 +125,14 @@ def gaussian_congugates_known_sigma_predictive(prior, sigma, s, n):
     n: Tensor of type `int`.  The number(s) of observations.
 
   Returns:
-    A new Gaussian predictive distribution object.
+    A new Normal predictive distribution object.
 
   Raises:
     TypeError: if dtype of `s` does not match `dtype`, or `prior` is not a
-      Gaussian object.
+      Normal object.
   """
-  if not isinstance(prior, Gaussian):
-    raise TypeError("Expected prior to be an instance of type Gaussian")
+  if not isinstance(prior, Normal):
+    raise TypeError("Expected prior to be an instance of type Normal")
 
   if s.dtype != prior.dtype:
     raise TypeError(
@@ -143,6 +143,6 @@ def gaussian_congugates_known_sigma_predictive(prior, sigma, s, n):
   sigma0_2 = math_ops.square(prior.sigma)
   sigma_2 = math_ops.square(sigma)
   sigmap_2 = 1.0/(1/sigma0_2 + n/sigma_2)
-  return Gaussian(
+  return Normal(
       mu=(prior.mu/sigma0_2 + s/sigma_2) * sigmap_2,
       sigma=math_ops.sqrt(sigmap_2 + sigma_2))

From 46d7c44571fd9a9fb9d3ded10146796b34caede5 Mon Sep 17 00:00:00 2001
From: Zongheng Yang <zongheng.y@gmail.com>
Date: Wed, 25 May 2016 14:51:46 -0800
Subject: [PATCH 20/20] Supports negative axes for sparse_reduce_sum().

Useful for cases where the rank of the sparse input is dynamic/unknown,
and the desired axes are static constants (e.g., -1).
Change: 123262728
---
 .../core/kernels/sparse_reduce_sum_op.cc      |  8 +++
 tensorflow/core/ops/sparse_ops.cc             |  3 +-
 .../python/kernel_tests/sparse_ops_test.py    | 61 +++++++++++++------
 tensorflow/python/ops/sparse_ops.py           |  5 +-
 4 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/tensorflow/core/kernels/sparse_reduce_sum_op.cc b/tensorflow/core/kernels/sparse_reduce_sum_op.cc
index 02b64c48479..20233b120d2 100644
--- a/tensorflow/core/kernels/sparse_reduce_sum_op.cc
+++ b/tensorflow/core/kernels/sparse_reduce_sum_op.cc
@@ -74,6 +74,14 @@ class SparseReduceSumOp : public OpKernel {
     std::vector<int32> axes(num_reduction_axes);
     std::copy_n(reduction_axes_t->flat<int32>().data(), num_reduction_axes,
                 axes.begin());
+    for (int i = 0; i < num_reduction_axes; ++i) {
+      int32 axis = axes[i];
+      OP_REQUIRES(
+          ctx, axis >= -ndims && axis < ndims,
+          errors::InvalidArgument("Invalid reduction dimension ", axis,
+                                  ", for input with ", ndims, " dimensions."));
+      axes[i] = (axes[i] + ndims) % ndims;
+    }
     std::sort(axes.begin(), axes.end());
 
     std::vector<int64> group_by_dims;
diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc
index c8f4f8d25b0..378733f59b8 100644
--- a/tensorflow/core/ops/sparse_ops.cc
+++ b/tensorflow/core/ops/sparse_ops.cc
@@ -430,7 +430,8 @@ Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
 with length 1.
 
 If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-with a single element is returned.
+with a single element is returned.  Additionally, the axes can be negative,
+which are interpreted according to the indexing rules in Python.
 
 input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
   SparseTensor, possibly not in canonical ordering.
diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py
index 6b046883d4d..037d1f2c3eb 100644
--- a/tensorflow/python/kernel_tests/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops_test.py
@@ -417,16 +417,27 @@ class SparseFillEmptyRowsTest(test_util.TensorFlowTestCase):
 
 class SparseReduceSumTest(test_util.TensorFlowTestCase):
 
-  def _compare(self, sp_t, reduction_axes, keep_dims):
+  # [[1, ?, 1]
+  #  [?, 1, ?]]
+  # where ? is implictly-zero.
+  ind = np.array([[0, 0], [0, 2], [1, 1]]).astype(np.int64)
+  vals = np.array([1, 1, 1]).astype(np.int32)
+  shape = np.array([2, 3]).astype(np.int64)
+
+  def _compare(self, sp_t, reduction_axes, ndims, keep_dims):
     densified = sparse_ops.sparse_tensor_to_dense(sp_t).eval()
 
     np_ans = densified
     if reduction_axes is None:
       np_ans = np.sum(np_ans, keepdims=keep_dims)
     else:
-      if isinstance(reduction_axes, list):
-        reduction_axes = sorted(reduction_axes)  # loop below depends on sorted
+      if not isinstance(reduction_axes, list):  # Single scalar.
+        reduction_axes = [reduction_axes]
       reduction_axes = np.array(reduction_axes).astype(np.int32)
+      # Handles negative axes.
+      reduction_axes = (reduction_axes + ndims) % ndims
+      # Loop below depends on sorted.
+      reduction_axes.sort()
       for ra in reduction_axes.ravel()[::-1]:
         np_ans = np.sum(np_ans, axis=ra, keepdims=keep_dims)
 
@@ -436,25 +447,21 @@ class SparseReduceSumTest(test_util.TensorFlowTestCase):
 
     self.assertAllClose(np_ans, out)
 
-  def _compare_all(self, sp_t, reduction_axes):
-    self._compare(sp_t, reduction_axes, False)
-    self._compare(sp_t, reduction_axes, True)
+  def _compare_all(self, sp_t, reduction_axes, ndims):
+    self._compare(sp_t, reduction_axes, ndims, False)
+    self._compare(sp_t, reduction_axes, ndims, True)
 
   def testSimpleAndRandomInputs(self):
-    # [[1, ?, 1]
-    #  [?, 1, ?]]
-    # where ? is implictly-zero.
-    ind = np.array([[0, 0], [0, 2], [1, 1]]).astype(np.int64)
-    vals = np.array([1, 1, 1]).astype(np.int32)
-    shape = np.array([2, 3]).astype(np.int64)
-    sp_t = ops.SparseTensor(ind, vals, shape)
+    sp_t = ops.SparseTensor(self.ind, self.vals, self.shape)
 
     with self.test_session(use_gpu=False):
-      self._compare_all(sp_t, None)
-      self._compare_all(sp_t, 0)
-      self._compare_all(sp_t, [1])
-      self._compare_all(sp_t, [0, 1])
-      self._compare_all(sp_t, [1, 0])
+      self._compare_all(sp_t, None, ndims=2)
+      self._compare_all(sp_t, 0, ndims=2)
+      self._compare_all(sp_t, [1], ndims=2)
+      self._compare_all(sp_t, [0, 1], ndims=2)
+      self._compare_all(sp_t, [1, 0], ndims=2)
+      self._compare_all(sp_t, [-1], ndims=2)
+      self._compare_all(sp_t, [1, -2], ndims=2)
 
     np.random.seed(1618)
     test_dims = [(1618, 1, 11, 7, 1), (1,), (1, 1, 1)]
@@ -462,11 +469,19 @@ class SparseReduceSumTest(test_util.TensorFlowTestCase):
       for dims in test_dims:
         sp_t, unused_nnz = _sparsify(np.random.randn(*dims))
         # reduce all using None
-        self._compare_all(sp_t, None)
+        self._compare_all(sp_t, None, ndims=len(dims))
         # reduce random axes from 1D to N-D
         for d in range(1, len(dims) + 1):
           axes = np.random.choice(len(dims), size=d, replace=False).tolist()
-          self._compare_all(sp_t, axes)
+          self._compare_all(sp_t, axes, ndims=len(dims))
+
+  def testInvalidAxes(self):
+    sp_t = ops.SparseTensor(self.ind, self.vals, self.shape)
+    with self.test_session(use_gpu=False):
+      with self.assertRaisesOpError("Invalid reduction dimension -3"):
+        sparse_ops.sparse_reduce_sum(sp_t, -3).eval()
+      with self.assertRaisesOpError("Invalid reduction dimension 2"):
+        sparse_ops.sparse_reduce_sum(sp_t, 2).eval()
 
   def testGradient(self):
     np.random.seed(8161)
@@ -483,6 +498,12 @@ class SparseReduceSumTest(test_util.TensorFlowTestCase):
                                                reduced.eval().shape)
           self.assertLess(err, 1e-3)
 
+        # Tests for negative axes.
+        reduced = sparse_ops.sparse_reduce_sum(sp_t, -1)
+        err = tf.test.compute_gradient_error(sp_t.values, (nnz,), reduced,
+                                             reduced.eval().shape)
+        self.assertLess(err, 1e-3)
+
 
 class SparseMathOpsTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index 4df0e9c5d8e..fbce1103fcc 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -548,7 +548,8 @@ def sparse_reduce_sum(sp_input, reduction_axes=None, keep_dims=False):
   with length 1.
 
   If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-  with a single element is returned.
+  with a single element is returned.  Additionally, the axes can be negative,
+  similar to the indexing rules in Python.
 
   For example:
 
@@ -558,7 +559,7 @@ def sparse_reduce_sum(sp_input, reduction_axes=None, keep_dims=False):
   # where ? is implictly-zero.
   tf.sparse_reduce_sum(x) ==> 3
   tf.sparse_reduce_sum(x, 0) ==> [1, 1, 1]
-  tf.sparse_reduce_sum(x, 1) ==> [2, 1]
+  tf.sparse_reduce_sum(x, 1) ==> [2, 1]  # Can also use -1 as the axis.
   tf.sparse_reduce_sum(x, 1, keep_dims=True) ==> [[2], [1]]
   tf.sparse_reduce_sum(x, [0, 1]) ==> 3
   ```