Merge pull request #2518 from vrv/branch_123332988

Upstream changes from internal
2016-05-26 15:08:06 -07:00 · 2016-05-26 15:08:06 -07:00 · 15e51e6113
commit 15e51e6113
parent 708cd77460 950cdab050
602 changed files with 943 additions and 321 deletions
--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
@ -53,6 +53,7 @@ def optimize_loss(loss,
                  clip_gradients=None,
                  moving_average_decay=0.9,
                  learning_rate_decay_fn=None,
                  update_ops=None,
                  variables=None,
                  name=None):
  """Given loss and parameters for optimizer, returns a training op.
@ -81,6 +82,8 @@ def optimize_loss(loss,
                            Can be used to implement any learning rate decay
                            functions.
                            For example: tf.train.exponential_decay.
    update_ops: list of update `Operation`s to execute at each step. If `None`,
                uses elements of UPDATE_OPS collection.
    variables: list of variables to optimize or
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
@ -92,6 +95,15 @@ def optimize_loss(loss,
    ValueError: if optimizer is wrong type.
  """
  with vs.variable_op_scope([loss, global_step], name, "OptimizeLoss"):
    # Update ops take UPDATE_OPS collection if not provided.
    update_ops = (set(update_ops or []) or
                  set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)))
    # Make sure update ops are ran before computing loss.
    if update_ops:
      with ops.control_dependencies(update_ops):
        barrier = control_flow_ops.no_op(name="update_barrier")
      loss = control_flow_ops.with_dependencies([barrier], loss)
    # Moving average of the loss with decay.
    if moving_average_decay is not None:
      # Generate moving averages of the loss.
--- a/tensorflow/contrib/layers/python/layers/optimizers_test.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers_test.py
@ -132,6 +132,25 @@ class OptimizersTest(tf.test.TestCase):
    tf.contrib.layers.optimize_loss(
        loss, global_step, learning_rate=0.1, optimizer="SGD")
  def testUpdateOp(self):
    optimizers = ["SGD", tf.train.GradientDescentOptimizer,
                  tf.train.GradientDescentOptimizer(learning_rate=0.1)]
    for optimizer in optimizers:
      with tf.Graph().as_default() as g:
        with self.test_session(graph=g) as session:
          x, var, loss, global_step = _setup_model()
          update_op = tf.assign(var, 20)
          train = tf.contrib.layers.optimize_loss(loss,
                                                  global_step,
                                                  learning_rate=0.1,
                                                  optimizer=optimizer,
                                                  update_ops=[update_op])
          tf.initialize_all_variables().run()
          session.run(train, feed_dict={x: 5})
          var_value, global_step_value = session.run([var, global_step])
          # 19.5, due to update of var to 20 before loss computation.
          self.assertEqual(var_value, 19.5)
          self.assertEqual(global_step_value, 1)
 if __name__ == "__main__":
  tf.test.main()
--- a/tensorflow/contrib/learn/python/learn/graph_actions.py
+++ b/tensorflow/contrib/learn/python/learn/graph_actions.py
@ -195,7 +195,10 @@ def train(graph,
    raise ValueError('No "global_step" was provided or found in the graph.')
  # TODO(ipolosukhin): Replace all functionality of Supervisor with Monitors.
-  if not monitors:
+  if not supervisor_is_chief:
    # monitors should run only in supervisor.
    monitors = []
  elif not monitors:
    monitors = monitors_lib.get_default_monitors(
        loss_op=loss_op,
        summary_op=logging_ops.get_summary_op(),
--- a/tensorflow/contrib/learn/python/learn/io/graph_io.py
+++ b/tensorflow/contrib/learn/python/learn/io/graph_io.py
@ -26,8 +26,9 @@ from tensorflow.python.training import input as input_ops
 def read_batch_examples(file_pattern, batch_size, reader,
-                        randomize_input=True, queue_capacity=10000,
+                        randomize_input=True, num_epochs=None,
-                        num_threads=1, name='dequeue_examples'):
+                        queue_capacity=10000, num_threads=1,
                        name=None):
  """Adds operations to read, queue, batch `Example` protos.
  Given file pattern (or list of files), will setup a queue for file names,
@ -46,6 +47,10 @@ def read_batch_examples(file_pattern, batch_size, reader,
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If `None`, cycles through the dataset forever.
      NOTE - If specified, creates a variable that must be initialized, so call
      `tf.initialize_all_variables()` as shown in the tests.
    queue_capacity: Capacity for input queue.
    num_threads: The number of threads enqueuing examples.
    name: Name of resulting op.
@ -82,39 +87,47 @@ def read_batch_examples(file_pattern, batch_size, reader,
        (batch_size, queue_capacity))
  if (not num_threads) or (num_threads <= 0):
    raise ValueError('Invalid num_threads %s.' % num_threads)
  if (num_epochs is not None) and (num_epochs <= 0):
    raise ValueError('Invalid num_epochs %s.' % num_epochs)
-  with ops.name_scope(name) as scope:
+  with ops.op_scope([file_pattern], name, 'read_batch_examples') as scope:
    # Setup filename queue with shuffling.
    with ops.name_scope('file_name_queue') as file_name_queue_scope:
      file_name_queue = input_ops.string_input_producer(
          constant_op.constant(file_names, name='input'),
-          shuffle=randomize_input, name=file_name_queue_scope)
+          shuffle=randomize_input, num_epochs=num_epochs,
          name=file_name_queue_scope)
-    # Create reader and set it to read from filename queue.
+    # Create readers, one per thread and set them to read from filename queue.
    with ops.name_scope('read'):
-      _, example_proto = reader().read(file_name_queue)
+      example_list = []
      for _ in range(num_threads):
        _, example_proto = reader().read(file_name_queue)
        example_list.append([example_proto])
-    # Setup batching queue.
+    # Setup batching queue given list of read example tensors.
    if randomize_input:
      if isinstance(batch_size, ops.Tensor):
        min_after_dequeue = int(queue_capacity * 0.4)
      else:
        min_after_dequeue = max(queue_capacity - (3 * batch_size), batch_size)
-      examples = input_ops.shuffle_batch(
+      examples = input_ops.shuffle_batch_join(
-          [example_proto], batch_size, capacity=queue_capacity,
+          example_list, batch_size, capacity=queue_capacity,
-          num_threads=num_threads, min_after_dequeue=min_after_dequeue,
+          min_after_dequeue=min_after_dequeue,
          name=scope)
    else:
-      examples = input_ops.batch(
+      examples = input_ops.batch_join(
-          [example_proto], batch_size, capacity=queue_capacity,
+          example_list, batch_size, capacity=queue_capacity,
-          num_threads=num_threads, name=scope)
+          name=scope)
    return examples
 def read_batch_features(file_pattern, batch_size, features, reader,
-                        randomize_input=True, queue_capacity=10000,
+                        randomize_input=True, num_epochs=None,
-                        num_threads=1, name='dequeue_examples'):
+                        queue_capacity=10000, reader_num_threads=1,
                        parser_num_threads=1,
                        name=None):
  """Adds operations to read, queue, batch and parse `Example` protos.
  Given file pattern (or list of files), will setup a queue for file names,
@ -136,8 +149,13 @@ def read_batch_features(file_pattern, batch_size, features, reader,
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. NOTE - If specified,
      creates a variable that must be initialized, so call
      tf.initialize_all_variables() as shown in the tests.
    queue_capacity: Capacity for input queue.
-    num_threads: The number of threads enqueuing examples.
+    reader_num_threads: The number of threads to read examples.
    parser_num_threads: The number of threads to parse examples.
    name: Name of resulting op.
  Returns:
@ -146,17 +164,29 @@ def read_batch_features(file_pattern, batch_size, features, reader,
  Raises:
    ValueError: for invalid inputs.
  """
-  examples = read_batch_examples(
+  with ops.op_scope([file_pattern], name, 'read_batch_features') as scope:
-      file_pattern, batch_size, reader, randomize_input,
+    examples = read_batch_examples(
-      queue_capacity, num_threads, name=name)
+        file_pattern, batch_size, reader, randomize_input=randomize_input,
        num_epochs=num_epochs, queue_capacity=queue_capacity,
        num_threads=reader_num_threads, name=scope)
-  # Parse features into tensors.
+    # Parse features into tensors in many threads and put on the queue.
-  return parsing_ops.parse_example(examples, features)
+    features_list = []
    for _ in range(parser_num_threads):
      features_list.append(parsing_ops.parse_example(examples, features))
    return input_ops.batch_join(
        features_list,
        batch_size=batch_size,
        capacity=queue_capacity,
        enqueue_many=True,
        name='parse_example_batch_join')
 def read_batch_record_features(file_pattern, batch_size, features,
-                               randomize_input=True, queue_capacity=10000,
+                               randomize_input=True, num_epochs=None,
-                               num_threads=1, name='dequeue_record_examples'):
+                               queue_capacity=10000, reader_num_threads=1,
                               parser_num_threads=1,
                               name='dequeue_record_examples'):
  """Reads TFRecord, queues, batches and parses `Example` proto.
  See more detailed description in `read_examples`.
@ -168,8 +198,13 @@ def read_batch_record_features(file_pattern, batch_size, features,
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values.
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. NOTE - If specified,
      creates a variable that must be initialized, so call
      tf.initialize_all_variables() as shown in the tests.
    queue_capacity: Capacity for input queue.
-    num_threads: The number of threads enqueuing examples.
+    reader_num_threads: The number of threads to read examples.
    parser_num_threads: The number of threads to parse examples.
    name: Name of resulting op.
  Returns:
@ -181,5 +216,6 @@ def read_batch_record_features(file_pattern, batch_size, features,
  return read_batch_features(
      file_pattern=file_pattern, batch_size=batch_size, features=features,
      reader=io_ops.TFRecordReader,
-      randomize_input=randomize_input,
+      randomize_input=randomize_input, num_epochs=num_epochs,
-      queue_capacity=queue_capacity, num_threads=num_threads, name=name)
+      queue_capacity=queue_capacity, reader_num_threads=reader_num_threads,
      parser_num_threads=parser_num_threads, name=name)
--- a/tensorflow/contrib/learn/python/learn/io/graph_io_test.py
+++ b/tensorflow/contrib/learn/python/learn/io/graph_io_test.py
@ -17,10 +17,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import random
 import tempfile
 import tensorflow as tf
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import gfile
@ -55,44 +58,83 @@ class GraphIOTest(tf.test.TestCase):
    self.assertRaisesRegexp(
        ValueError, "No files match",
-        tf.contrib.learn.io.read_batch_features,
+        tf.contrib.learn.io.read_batch_examples,
-        _INVALID_FILE_PATTERN, default_batch_size, None, tf.TFRecordReader,
+        _INVALID_FILE_PATTERN, default_batch_size, tf.TFRecordReader,
-        False, queue_capacity,
+        False, num_epochs=None, queue_capacity=queue_capacity,
-        num_threads, name)
+        num_threads=num_threads, name=name)
    self.assertRaisesRegexp(
        ValueError, "Invalid batch_size",
-        tf.contrib.learn.io.read_batch_features,
+        tf.contrib.learn.io.read_batch_examples,
-        _VALID_FILE_PATTERN, None, None, tf.TFRecordReader,
+        _VALID_FILE_PATTERN, None, tf.TFRecordReader,
-        False, queue_capacity, num_threads, name)
+        False, num_epochs=None, queue_capacity=queue_capacity,
        num_threads=num_threads, name=name)
    self.assertRaisesRegexp(
        ValueError, "Invalid batch_size",
-        tf.contrib.learn.io.read_batch_features,
+        tf.contrib.learn.io.read_batch_examples,
-        _VALID_FILE_PATTERN, -1, None, tf.TFRecordReader,
+        _VALID_FILE_PATTERN, -1, tf.TFRecordReader,
-        False, queue_capacity, num_threads, name)
+        False, num_epochs=None, queue_capacity=queue_capacity,
        num_threads=num_threads, name=name)
    self.assertRaisesRegexp(
        ValueError, "Invalid queue_capacity",
-        tf.contrib.learn.io.read_batch_features,
+        tf.contrib.learn.io.read_batch_examples,
-        _VALID_FILE_PATTERN, default_batch_size, None, tf.TFRecordReader,
+        _VALID_FILE_PATTERN, default_batch_size, tf.TFRecordReader,
-        False, None, num_threads, name)
+        False, num_epochs=None, queue_capacity=None,
        num_threads=num_threads, name=name)
    self.assertRaisesRegexp(
        ValueError, "Invalid num_threads",
-        tf.contrib.learn.io.read_batch_features,
+        tf.contrib.learn.io.read_batch_examples,
-        _VALID_FILE_PATTERN, default_batch_size, None, tf.TFRecordReader,
+        _VALID_FILE_PATTERN, default_batch_size, tf.TFRecordReader,
-        False, queue_capacity, None,
+        False, num_epochs=None, queue_capacity=queue_capacity,
-        name)
+        num_threads=None, name=name)
    self.assertRaisesRegexp(
        ValueError, "Invalid num_threads",
-        tf.contrib.learn.io.read_batch_features,
+        tf.contrib.learn.io.read_batch_examples,
-        _VALID_FILE_PATTERN, default_batch_size, None, tf.TFRecordReader,
+        _VALID_FILE_PATTERN, default_batch_size, tf.TFRecordReader,
-        False, queue_capacity, -1,
+        False, num_epochs=None, queue_capacity=queue_capacity,
-        name)
+        num_threads=-1, name=name)
    self.assertRaisesRegexp(
        ValueError, "Invalid batch_size",
-        tf.contrib.learn.io.read_batch_features,
+        tf.contrib.learn.io.read_batch_examples,
-        _VALID_FILE_PATTERN, queue_capacity + 1, None, tf.TFRecordReader,
+        _VALID_FILE_PATTERN, queue_capacity + 1, tf.TFRecordReader,
-        False, queue_capacity, 1, name)
+        False, num_epochs=None, queue_capacity=queue_capacity,
        num_threads=1, name=name)
    self.assertRaisesRegexp(
        ValueError, "Invalid num_epochs",
        tf.contrib.learn.io.read_batch_examples,
        _VALID_FILE_PATTERN, default_batch_size, tf.TFRecordReader,
        False, num_epochs=-1, queue_capacity=queue_capacity, num_threads=1,
        name=name)
-  def test_batch_tf_record(self):
+  def test_batch_record_features(self):
    batch_size = 17
    queue_capacity = 1234
    name = "my_batch"
    features = {"feature": tf.FixedLenFeature(shape=[0], dtype=tf.float32)}
    with tf.Graph().as_default() as g, self.test_session(graph=g) as sess:
      features = tf.contrib.learn.io.read_batch_record_features(
          _VALID_FILE_PATTERN, batch_size, features, randomize_input=False,
          queue_capacity=queue_capacity, reader_num_threads=2,
          parser_num_threads=2, name=name)
      self.assertEquals("%s/parse_example_batch_join:0" % name,
                        features["feature"].name)
      file_name_queue_name = "%s/file_name_queue" % name
      file_names_name = "%s/input" % file_name_queue_name
      example_queue_name = "%s/fifo_queue" % name
      parse_example_queue_name = "%s/parse_example_batch_join" % name
      op_nodes = test_util.assert_ops_in_graph({
          file_names_name: "Const",
          file_name_queue_name: "FIFOQueue",
          "%s/read/TFRecordReader" % name: "TFRecordReader",
          example_queue_name: "FIFOQueue",
          parse_example_queue_name: "QueueDequeueMany",
          name: "QueueDequeueMany"
      }, g)
      self.assertAllEqual(_FILE_NAMES, sess.run(["%s:0" % file_names_name])[0])
      self.assertEqual(
          queue_capacity, op_nodes[example_queue_name].attr["capacity"].i)
  def test_one_epoch(self):
    batch_size = 17
    queue_capacity = 1234
    name = "my_batch"
@ -100,20 +142,25 @@ class GraphIOTest(tf.test.TestCase):
    with tf.Graph().as_default() as g, self.test_session(graph=g) as sess:
      inputs = tf.contrib.learn.io.read_batch_examples(
          _VALID_FILE_PATTERN, batch_size,
-          reader=tf.TFRecordReader, randomize_input=False,
+          reader=tf.TFRecordReader, randomize_input=True,
          num_epochs=1,
          queue_capacity=queue_capacity, name=name)
      self.assertEquals("%s:0" % name, inputs.name)
      file_name_queue_name = "%s/file_name_queue" % name
      file_name_queue_limit_name = (
          "%s/limit_epochs/epochs" % file_name_queue_name)
      file_names_name = "%s/input" % file_name_queue_name
-      example_queue_name = "%s/fifo_queue" % name
+      example_queue_name = "%s/random_shuffle_queue" % name
      op_nodes = test_util.assert_ops_in_graph({
          file_names_name: "Const",
          file_name_queue_name: "FIFOQueue",
          "%s/read/TFRecordReader" % name: "TFRecordReader",
-          example_queue_name: "FIFOQueue",
+          example_queue_name: "RandomShuffleQueue",
-          name: "QueueDequeueMany"
+          name: "QueueDequeueMany",
          file_name_queue_limit_name: "Variable"
      }, g)
-      self.assertAllEqual(_FILE_NAMES, sess.run(["%s:0" % file_names_name])[0])
+      self.assertEqual(
          set(_FILE_NAMES), set(sess.run(["%s:0" % file_names_name])[0]))
      self.assertEqual(
          queue_capacity, op_nodes[example_queue_name].attr["capacity"].i)
@ -143,6 +190,34 @@ class GraphIOTest(tf.test.TestCase):
      self.assertEqual(
          queue_capacity, op_nodes[example_queue_name].attr["capacity"].i)
  def test_read_csv(self):
    gfile.Glob = self._orig_glob
    tempdir = tempfile.mkdtemp()
    filename = os.path.join(tempdir, "file.csv")
    gfile.Open(filename, "w").write("ABC\nDEF\nGHK\n")
    batch_size = 1
    queue_capacity = 5
    name = "my_batch"
    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
      inputs = tf.contrib.learn.io.read_batch_examples(
          filename, batch_size,
          reader=tf.TextLineReader, randomize_input=False,
          num_epochs=1, queue_capacity=queue_capacity, name=name)
      session.run(tf.initialize_all_variables())
      coord = tf.train.Coordinator()
      tf.train.start_queue_runners(session, coord=coord)
      self.assertAllEqual(session.run(inputs), [b"ABC"])
      self.assertAllEqual(session.run(inputs), [b"DEF"])
      self.assertAllEqual(session.run(inputs), [b"GHK"])
      with self.assertRaises(errors.OutOfRangeError):
        session.run(inputs)
      coord.request_stop()
 if __name__ == "__main__":
  tf.test.main()
--- a/tensorflow/contrib/linear_optimizer/kernels/squared-loss.h
+++ b/tensorflow/contrib/linear_optimizer/kernels/squared-loss.h
@ -26,15 +26,17 @@ namespace tensorflow {
 class SquaredLossUpdater : public DualLossUpdater {
 public:
  // Closed form solution that decreases the dual squared loss.
-  // See page 23 of http://arxiv.org/pdf/1309.2375v2.pdf
+  // See page 23 of http://arxiv.org/pdf/1309.2375v2.pdf for the derivation of
  // the update rule when the example weights are equal to 1.0.
  // Note: There is a typo in the formula in the paper: the denominator should
  // be 1 + ||x_i||^2/(\lambda n) (without the 2 multiplier).
  double ComputeUpdatedDual(const double label, const double example_weight,
                            const double current_dual, const double wx,
                            const double weighted_example_norm,
                            const double primal_loss_unused,
                            const double dual_loss_unused) const final {
-    const double delta_numerator = (label - current_dual - wx) * example_weight;
+    const double delta_numerator = label - current_dual - wx;
-    const double delta_denominator =
+    const double delta_denominator = 1 + weighted_example_norm * example_weight;
        1 + weighted_example_norm * example_weight * example_weight * 0.5;
    return current_dual + delta_numerator / delta_denominator;
  }
--- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
@ -455,6 +455,7 @@ class SdcaWithLogisticLossTest(SdcaOptimizerTest):
  # TODO(katsiaspis): add a test for the case when examples at the end of an
  # epoch are repeated, since example id may be duplicated.
 class SdcaWithLinearLossTest(SdcaOptimizerTest):
  """SDCA optimizer test class for linear (squared) loss."""
@ -488,9 +489,11 @@ class SdcaWithLinearLossTest(SdcaOptimizerTest):
      self.assertAllClose([-20.0 / 3.0, 28.0 / 3.0],
                          predictions.eval(),
                          rtol=0.005)
-      self.assertAllClose(0.01,
+      # Approximate gap should be very close to 0.0. (In fact, because the gap
      # is only approximate, it is likely that upon convergence the duality gap
      # can have a tiny negative value).
      self.assertAllClose(0.00,
                          lr.approximate_duality_gap().eval(),
                          rtol=1e-2,
                          atol=1e-2)
  def testL2Regularization(self):
@ -580,7 +583,7 @@ class SdcaWithLinearLossTest(SdcaOptimizerTest):
            {'age': [1],
             'gender': [1]}, 14.0, 2.0),
    ]
-    example_weights = [1.0, 1.0]
+    example_weights = [5.0, 3.0]
    with self._single_threaded_test_session():
      examples = make_example_dict(example_protos, example_weights)
@ -597,20 +600,30 @@ class SdcaWithLinearLossTest(SdcaOptimizerTest):
      for _ in xrange(_MAX_ITERATIONS):
        train_op.run()
-      # Predictions should be 8/9 of label due to minimizing regularized loss:
+      # There are 4 (sparse) variable weights to be learned. 2 for age and 2 for
-      #   (label - 2 * 2 * weight)^2 / 2 + L2 * 2 * weight^2
+      # gender. Let w_1, w_2 be age weights, w_3, w_4 be gender weights, y_1,
-      self.assertAllClose([-10.0 * 8 / 9, 14.0 * 8 / 9],
+      # y_2 be the labels for examples 1 and 2 respectively and s_1, s_2 the
      # corresponding *example* weights. With the given feature values, the loss
      # function is given by:
      # s_1/2(y_1 + 2w_1 + 2w_3)^2 + s_2/2(y_2 - 2w_2 - 2w_4)^2
      # + \lambda/2 (w_1^2 + w_2^2 + w_3^2 + w_4^2). Solving for the optimal, it
      # can be verified that:
      # w_1* = w_3* = -2.0 s_1 y_1/(\lambda + 8 s_1) and
      # w_2* = w_4* = 2 \cdot s_2 y_2/(\lambda + 8 s_2). Equivalently, due to
      # regularization and example weights, the predictions are within:
      # 8 \cdot s_i /(\lambda + 8 \cdot s_i) of the labels.
      self.assertAllClose([-10 * 40.0 / 41.0, 14.0 * 24 / 25.0],
                          predictions.eval(),
-                          rtol=0.07)
+                          atol=0.01)
-  def testDenseFeatures(self):
+  def testDenseFeaturesWithDefaultWeights(self):
    with self._single_threaded_test_session():
      examples = make_dense_examples_dict(
-          dense_feature_values=[[-2.0, 0.0], [0.0, 2.0]],
+          dense_feature_values=[[1.0, 0.0], [0.0, 1.0]],
          weights=[1.0, 1.0],
-          labels=[-10.0, 14.0])
+          labels=[10.0, -5.0])
      variables = make_dense_variable_dict(2, 2)
-      options = dict(symmetric_l2_regularization=1,
+      options = dict(symmetric_l2_regularization=1.0,
                     symmetric_l1_regularization=0,
                     loss_type='squared_loss')
      lr = SdcaModel(CONTAINER, examples, variables, options)
@ -621,14 +634,51 @@ class SdcaWithLinearLossTest(SdcaOptimizerTest):
      for _ in xrange(_MAX_ITERATIONS):
        train_op.run()
-      # Predictions should be 4/5 of label due to minimizing regularized loss:
+      # The loss function for these particular features is given by:
-      #   (label - 2 * weight)^2 / 2 + L2 * weight^2
+      # 1/2(label_1-w_1)^2 + 1/2(label_2-w_2)^2 + \lambda/2 (w_1^2 + w_2^2). So,
-      self.assertAllClose([-10.0 * 4 / 5, 14.0 * 4 / 5],
+      # differentiating wrt to w_1, w_2 yields the following optimal values:
      # w_1* = label_1/(\lambda + 1)= 10/2, w_2* =label_2/(\lambda + 1)= -5/2.
      # In this case the (unnormalized regularized) loss will be:
      # 1/2(10-5)^2 + 1/2(5-5/2)^2 + 1/2(5^2 + (5/2)^2) = 125.0/4. The actual
      # loss should be further normalized by the sum of example weights.
      self.assertAllClose([5.0, -2.5],
                          predictions.eval(),
                          rtol=0.01)
      loss = lr.regularized_loss(examples)
-      self.assertAllClose(148.0 / 10.0, loss.eval(), atol=0.01)
+      self.assertAllClose(125.0 / 8.0, loss.eval(), atol=0.01)
  def testDenseFeaturesWithArbitraryWeights(self):
    with self._single_threaded_test_session():
      examples = make_dense_examples_dict(
          dense_feature_values=[[1.0, 0.0], [0.0, 1.0]],
          weights=[20.0, 10.0],
          labels=[10.0, -5.0])
      variables = make_dense_variable_dict(2, 2)
      options = dict(symmetric_l2_regularization=5.0,
                     symmetric_l1_regularization=0,
                     loss_type='squared_loss')
      lr = SdcaModel(CONTAINER, examples, variables, options)
      tf.initialize_all_variables().run()
      predictions = lr.predictions(examples)
      train_op = lr.minimize()
      for _ in xrange(_MAX_ITERATIONS):
        train_op.run()
      # The loss function for these particular features is given by:
      # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 +
      # \lambda/2 (w_1^2 + w_2^2) where s_1, s_2 are the *example weights. It
      # turns out that the optimal (variable) weights are given by:
      # w_1* = label_1 \cdot s_1/(\lambda + s_1)= 8.0 and
      # w_2* =label_2 \cdot s_2/(\lambda + s_2)= -10/3.
      # In this case the (unnormalized regularized) loss will be:
      # s_1/2(8-10)^2 + s_2/2(5-10/3)^2 + 5.0/2(8^2 + (10/3)^2) = 2175.0/9. The
      # actual loss should be further normalized by the sum of example weights.
      self.assertAllClose([8.0, -10.0/3],
                          predictions.eval(),
                          rtol=0.01)
      loss = lr.regularized_loss(examples)
      self.assertAllClose(2175.0 / 270.0, loss.eval(), atol=0.01)
 class SdcaWithHingeLossTest(SdcaOptimizerTest):
--- a/tensorflow/contrib/losses/python/losses/init.py
+++ b/tensorflow/contrib/losses/python/losses/init.py
@ -19,7 +19,10 @@ from __future__ import division
 from __future__ import print_function
 from tensorflow.contrib.losses.python.losses.loss_ops import absolute_difference
 from tensorflow.contrib.losses.python.losses.loss_ops import add_loss
 from tensorflow.contrib.losses.python.losses.loss_ops import cosine_distance
 from tensorflow.contrib.losses.python.losses.loss_ops import get_losses
 from tensorflow.contrib.losses.python.losses.loss_ops import get_total_loss
 from tensorflow.contrib.losses.python.losses.loss_ops import log
 from tensorflow.contrib.losses.python.losses.loss_ops import sigmoid_cross_entropy
 from tensorflow.contrib.losses.python.losses.loss_ops import softmax_cross_entropy
--- a/tensorflow/contrib/losses/python/losses/loss_ops.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops.py
@ -104,9 +104,11 @@ weighted average over the individual prediction errors:
  weight = tf.div(weight, tf.size(weight))
  loss = tf.contrib.losses.sum_of_squares(predictions, depths, weight)
@@absolute_difference
@@add_loss
@@cosine_distance
@@get_losses
@@get_total_loss
@@log
@@sigmoid_cross_entropy
@@softmax_cross_entropy
@ -252,6 +254,61 @@ def _num_present(losses, weight, per_batch=False):
  return num_per_batch if per_batch else math_ops.reduce_sum(num_per_batch)
 def add_loss(loss):
  """Adds a externally defined loss to collection of losses.
  Args:
    loss: A loss `Tensor`.
  """
  ops.add_to_collection(ops.GraphKeys.LOSSES, loss)
 def get_losses(scope=None):
  """Gets the list of loss variables.
  Args:
    scope: an optional scope for filtering the losses to return.
  Returns:
    a list of loss variables.
  """
  return ops.get_collection(ops.GraphKeys.LOSSES, scope)
 def get_regularization_losses(scope=None):
  """Gets the regularization losses.
  Args:
    scope: an optional scope for filtering the losses to return.
  Returns:
    A list of loss variables.
  """
  return ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES, scope)
 def get_total_loss(add_regularization_losses=True, name="total_loss"):
  """Returns a tensor whose value represents the total loss.
  Notice that the function adds the given losses to the regularization losses.
  Args:
    add_regularization_losses: A boolean indicating whether or not to use the
      regularization losses in the sum.
    name: The name of the returned tensor.
  Returns:
    A `Tensor` whose value represents the total loss.
  Raises:
    ValueError: if `losses` is not iterable.
  """
  losses = get_losses()
  if add_regularization_losses:
    losses += get_regularization_losses()
  return math_ops.add_n(losses, name=name)
 def absolute_difference(predictions, targets, weight=1.0, scope=None):
  """Adds an Absolute Difference loss to the training procedure.
--- a/tensorflow/core/common_runtime/simple_placer.cc
+++ b/tensorflow/core/common_runtime/simple_placer.cc
@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/simple_placer.h"
 #include <memory>
 #include <set>
 #include <utility>
 #include <vector>
@ -182,6 +183,7 @@ class ColocationGraph {
  Status ColocateNodes(const Node& x, const Node& y) {
    int x_root = FindRoot(x.id());
    int y_root = FindRoot(y.id());
    Status s;
    if (x_root != y_root) {
      // Merge the sets by swinging the parent pointer of the smaller
@ -229,6 +231,12 @@ class ColocationGraph {
                                       s.error_message());
      }
      // Transfer ids in the old group to the new one.
      members_[new_root].ids_in_group.insert(
          members_[old_root].ids_in_group.begin(),
          members_[old_root].ids_in_group.end());
      members_[old_root].ids_in_group.clear();
      // Ensure that the common root has at least one supported device
      // type, by computing the intersection of
      // members_[new_root].supported_device_types and
@ -267,6 +275,9 @@ class ColocationGraph {
      return Status::OK();
    }
    // String containing additional debugging info on failures.
    string debug_info;
    // We have not yet computed the possible devices for the
    // colocated node set containing 'node', so we do so now using the
    // constraints on the root node.
@ -310,6 +321,8 @@ class ColocationGraph {
        // Return an error when a physical device that matches an explicit
        // device specification is not found. This ensures that we don't
        // assign a node to GPU when the user wanted to force it on CPU.
        AddDebugInfo(node_root, &debug_info);
        DeviceNameUtils::ParsedName specified_device_name;
        if (DeviceNameUtils::ParseFullName(node->def().device(),
                                           &specified_device_name) &&
@ -334,16 +347,17 @@ class ColocationGraph {
                node->def().device(),
                "' because no devices matching that specification "
                "are registered in this process; available devices: ",
-                str_util::Join(device_names, ", "));
+                str_util::Join(device_names, ", "), debug_info);
          } else if (specified_device_name.has_type) {
            return errors::InvalidArgument(
                "Could not satisfy explicit device specification '",
                node->def().device(), "' because no supported kernel for ",
-                specified_device_name.type, " devices is available");
+                specified_device_name.type, " devices is available.",
                debug_info);
          } else {
            return errors::InvalidArgument(
                "Could not satisfy explicit device specification '",
-                node->def().device());
+                node->def().device(), debug_info);
          }
        } else {
          // The specified device may be a valid device but the
@ -355,7 +369,7 @@ class ColocationGraph {
              "required incompatible device '",
              DeviceNameUtils::ParsedNameToString(
                  members_[node_root].device_name),
-              "'");
+              "'", debug_info);
        }
      }
    } else {
@ -368,10 +382,11 @@ class ColocationGraph {
          device_set_->devices(), members_[node_root].supported_device_types);
      if (devices.empty()) {
        AddDebugInfo(node_root, &debug_info);
        return errors::InvalidArgument(
            "Node had no OpKernel registered to support this operation: ",
            "Operation was ", node->type_string(), " and inputs were ",
-            DataTypeVectorString(node->input_types()));
+            DataTypeVectorString(node->input_types()), debug_info);
      }
    }
@ -390,6 +405,15 @@ class ColocationGraph {
    // id if it is a root. parent <= 0 indicates that this member is invalid.
    int parent = -1;
    // The set of ids that are part of the disjoint node set forest.
    //
    // This is only fully specified in the root of a disjoint
    // node set forest.
    std::set<int> ids_in_group;
    // The type of the op for this node.
    string op_type;
    // A proxy for the depth of the tree that is used to prefer
    // connecting smaller trees to larger trees when merging disjoint
    // sets.
@ -410,8 +434,41 @@ class ColocationGraph {
    std::vector<Device*> possible_devices;
  };
  // Adds debugging info to 'output' for the node referred to by
  // 'node_root'.
  void AddDebugInfo(const int node_root, string* output) {
    if (members_[node_root].ids_in_group.size() > 1) {
      strings::StrAppend(output, "\nColocation Debug Info:\n");
      // If this node is part of a colocation group, then we want to
      // collect the mapping of ops to supported devices, so that
      // the user can see why an unsatisfiable placement occurred.
      strings::StrAppend(
          output, "Colocation group had the following types and devices: ");
      std::unordered_map<string, string> type_to_devices;
      for (const int id : members_[node_root].ids_in_group) {
        const string& op_type = members_[id].op_type;
        string devices_registered;
        for (const auto& device_type : members_[id].supported_device_types) {
          strings::StrAppend(&devices_registered, DeviceTypeString(device_type),
                             " ");
        }
        type_to_devices[op_type] = devices_registered;
      }
      for (const auto& td : type_to_devices) {
        strings::StrAppend(output, "\n", td.first, ": ", td.second);
      }
    }
  }
  Status InitializeMember(const Node& node, Member* member) {
    const int id = node.id();
    member->ids_in_group.insert(id);
    member->op_type = node.type_string();
    if (id < 0) {
      return errors::InvalidArgument("Node id was not positive: ", id);
    }
--- a/tensorflow/core/common_runtime/simple_placer_test.cc
+++ b/tensorflow/core/common_runtime/simple_placer_test.cc
@ -729,6 +729,12 @@ TEST_F(SimplePlacerTest, TestHeterogeneousDeviceSetFailure) {
  EXPECT_TRUE(StringPiece(s.error_message())
                  .contains("colocated with a group of nodes that required "
                            "incompatible device"));
  // The error message should contain information that indicates which
  // op types have which registered device types.
  EXPECT_TRUE(StringPiece(s.error_message()).contains("VariableGPU: GPU")) << s;
  EXPECT_TRUE(StringPiece(s.error_message()).contains("TestAssign: GPU CPU"))
      << s;
 }
 // Test that placement fails when an unknown device is requested.
--- a/tensorflow/core/kernels/cholesky_grad.cc
+++ b/tensorflow/core/kernels/cholesky_grad.cc
@ -13,75 +13,68 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/framework/op.h"
 #include "third_party/eigen3/Eigen/Core"
-
+#include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/kernels/linalg_ops_common.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/binary_linalg_ops_common.h"
 namespace tensorflow {
-template <typename T>
+template <typename Scalar, bool SupportsBatchOperationT>
-class CholeskyGrad : public OpKernel {
+class CholeskyGrad
    : public BinaryLinearAlgebraOp<Scalar, SupportsBatchOperationT> {
 public:
-  explicit CholeskyGrad(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit CholeskyGrad(OpKernelConstruction* context)
      : BinaryLinearAlgebraOp<Scalar, SupportsBatchOperationT>(context) {}
  ~CholeskyGrad() override {}
  using Matrix =
-      Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+      Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
  using ConstMatrixMap = Eigen::Map<const Matrix>;
  using MatrixMap = Eigen::Map<Matrix>;
  using ConstRef = Eigen::Ref<const Matrix>;
  using Ref = Eigen::Ref<Matrix>;
-  void Compute(OpKernelContext* context) override {
+  TensorShape GetOutputMatrixShape(
-    const Tensor& input_tensor_l = context->input(0);
+      const TensorShape& input_matrix_l_full_shape,
-    const Tensor& input_tensor_grad = context->input(1);
+      const TensorShape& input_matrix_grad_shape) override {
-    // Check that input tensors represent a matrix.
+    return input_matrix_l_full_shape;
-    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_tensor_l.shape()),
+  }
                errors::InvalidArgument("In[0] is not a matrix"));
    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_tensor_grad.shape()),
                errors::InvalidArgument("In[1] is not a matrix"));
    // Check that input tensors are square.
    OP_REQUIRES(context,
                input_tensor_l.dim_size(0) == input_tensor_l.dim_size(1),
                errors::InvalidArgument("Input matrix must be square."));
    OP_REQUIRES(context,
                input_tensor_grad.dim_size(0) == input_tensor_grad.dim_size(1),
                errors::InvalidArgument("Input matrix must be square."));
-    // Check that input tensors are of same size.
+  int64 GetCostPerUnit(const TensorShape& input_matrix_shape,
-    OP_REQUIRES(context,
+                       const TensorShape& rhs_matrix_shape) override {
-                input_tensor_l.dim_size(0) == input_tensor_grad.dim_size(0),
+    const int64 rows = input_matrix_shape.dim_size(0);
-                errors::InvalidArgument("Input matrices must be same size."));
+    if (rows > (1LL << 20)) {
-
+      // A big number to cap the cost in case overflow.
-    // Create an output tensor
+      return kint64max;
-    Tensor* output_tensor = NULL;
+    } else {
-    OP_REQUIRES_OK(context, context->allocate_output(
+      return rows * rows * rows;
                                0, input_tensor_grad.shape(), &output_tensor));
    if (output_tensor->NumElements() == 0) {
      // the output shape is a 0-element matrix, so there is nothing to do.
      return;
    }
-    // The next lines are necessary to get Eigen matrix behaviour.
+  }
    const ConstMatrixMap input_matrix_l_full(input_tensor_l.flat<T>().data(),
                                             input_tensor_l.dim_size(0),
                                             input_tensor_l.dim_size(1));
    const ConstMatrixMap input_matrix_grad(input_tensor_grad.flat<T>().data(),
                                           input_tensor_grad.dim_size(0),
                                           input_tensor_grad.dim_size(1));
    MatrixMap output_matrix(output_tensor->template flat<T>().data(),
                            input_tensor_l.dim_size(0),
                            input_tensor_l.dim_size(1));
-    // Algorithm only depends on lower triangular half on input_tensor_l.
+  void ComputeMatrix(OpKernelContext* context,
                     const ConstMatrixMap& input_matrix_l_full,
                     const ConstMatrixMap& input_matrix_grad,
                     MatrixMap* output_matrix) override {
    OP_REQUIRES(context,
                input_matrix_l_full.rows() == input_matrix_l_full.cols(),
                errors::InvalidArgument("Input matrix must be square."));
    OP_REQUIRES(
        context, input_matrix_l_full.cols() == input_matrix_grad.cols(),
        errors::InvalidArgument(
            "Input matrix and gradient must have same number of cols."));
    OP_REQUIRES(
        context, input_matrix_l_full.rows() == input_matrix_grad.rows(),
        errors::InvalidArgument(
            "Input matrix and gradient must have same number of rows."));
    // Algorithm only depends on lower triangular half on input_matrix_l.
    const Matrix input_matrix_l =
        input_matrix_l_full.template triangularView<Eigen::Lower>();
    // Algorithm only depends on lower triangular half on input_matrix_grad.
-    output_matrix = input_matrix_grad.template triangularView<Eigen::Lower>();
+    *output_matrix = input_matrix_grad.template triangularView<Eigen::Lower>();
    const int64 kMatrixSize = input_matrix_l.rows();
    const int64 kMaxBlockSize = 32;
@ -104,20 +97,21 @@ class CholeskyGrad : public OpKernel {
      auto B = input_matrix_l.block(block_end, 0, trailing_size, block_begin);
      auto B_bar =
-          output_matrix.block(block_end, 0, trailing_size, block_begin);
+          output_matrix->block(block_end, 0, trailing_size, block_begin);
      auto C = input_matrix_l.block(block_end, block_begin, trailing_size,
                                    block_size);
-      auto C_bar = output_matrix.block(block_end, block_begin, trailing_size,
+      auto C_bar = output_matrix->block(block_end, block_begin, trailing_size,
-                                       block_size);
+                                        block_size);
      auto D = input_matrix_l.block(block_begin, block_begin, block_size,
                                    block_size);
-      auto D_bar =
+      auto D_bar = output_matrix->block(block_begin, block_begin, block_size,
-          output_matrix.block(block_begin, block_begin, block_size, block_size);
+                                        block_size);
      auto R = input_matrix_l.block(block_begin, 0, block_size, block_begin);
-      auto R_bar = output_matrix.block(block_begin, 0, block_size, block_begin);
+      auto R_bar =
          output_matrix->block(block_begin, 0, block_size, block_begin);
      C_bar = D.adjoint().template triangularView<Eigen::Upper>()
          .solve(C_bar.adjoint()).adjoint();
@ -127,9 +121,11 @@ class CholeskyGrad : public OpKernel {
      CholeskyGradUnblocked(D, D_bar);
      R_bar -= (D_bar + D_bar.adjoint()) * R;
    }
-    output_matrix = (0.5 * (output_matrix + output_matrix.transpose())).eval();
+    *output_matrix =
        (0.5 * (*output_matrix + output_matrix->transpose())).eval();
  }
-  void CholeskyGradUnblocked(const ConstRef l_block, Ref grad_block) {
+
  void CholeskyGradUnblocked(const ConstRef& l_block, Ref grad_block) {
    const int64 kMatrixSize = l_block.rows();
    for (int64 k = kMatrixSize - 1; k >= 0; k--) {
      /* This shows the block structure.
@ -166,6 +162,11 @@ class CholeskyGrad : public OpKernel {
  }
 };
-REGISTER_LINALG_OP("CholeskyGrad", (CholeskyGrad<float>), float);
+REGISTER_BINARY_LINALG_OP("CholeskyGrad", (CholeskyGrad<float, false>), float);
-REGISTER_LINALG_OP("CholeskyGrad", (CholeskyGrad<double>), double);
+REGISTER_BINARY_LINALG_OP("CholeskyGrad", (CholeskyGrad<double, false>),
                          double);
 REGISTER_BINARY_LINALG_OP("BatchCholeskyGrad", (CholeskyGrad<float, true>),
                          float);
 REGISTER_BINARY_LINALG_OP("BatchCholeskyGrad", (CholeskyGrad<double, true>),
                          double);
 }  // namespace tensorflow
--- a/tensorflow/core/kernels/conv_ops_gpu.h
+++ b/tensorflow/core/kernels/conv_ops_gpu.h
@ -64,8 +64,7 @@ class CudnnScratchAllocator : public perftools::gputools::ScratchAllocator {
        AllocatorAttributes(), allocation_attr));
    if (!allocation_status.ok()) {
      return perftools::gputools::port::StatusOr<
-          perftools::gputools::DeviceMemory<uint8>>(
+          perftools::gputools::DeviceMemory<uint8>>();
          AsDeviceMemory<uint8>(nullptr, 0));
    }
    // Hold the reference of the allocated tensors until the end of the
    // allocator.
--- a/tensorflow/core/kernels/pooling_ops_common.h
+++ b/tensorflow/core/kernels/pooling_ops_common.h
@ -305,7 +305,7 @@ void SpatialAvgPool(OpKernelContext* context, Tensor* output,
            const int out_offset =
                (b * params.out_height + ph) * params.out_width + pw;
            out_mat.col(out_offset) += in_mat.col(in_offset);
-            out_count(out_offset)++;
+            out_count(out_offset) += T(1);
          }
        }
      }
--- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt
@ -3175,6 +3175,31 @@ op {
    }
  }
 }
 op {
  name: "BatchCholeskyGrad"
  input_arg {
    name: "l"
    type_attr: "T"
  }
  input_arg {
    name: "grad"
    type_attr: "T"
  }
  output_arg {
    name: "output"
    type_attr: "T"
  }
  attr {
    name: "T"
    type: "type"
    allowed_values {
      list {
        type: DT_FLOAT
        type: DT_DOUBLE
      }
    }
  }
 }
 op {
  name: "BatchFFT"
  input_arg {
--- a/tensorflow/core/ops/linalg_ops.cc
+++ b/tensorflow/core/ops/linalg_ops.cc
@ -129,11 +129,34 @@ REGISTER_OP("CholeskyGrad")
    .Doc(R"doc(
 Calculates the reverse mode backpropagated gradient of the Cholesky algorithm.
-For an explanation see "Differentiation of the Cholesky algorithm" by Iain Murray http://arxiv.org/abs/1602.07527.
+For an explanation see "Differentiation of the Cholesky algorithm" by
 Iain Murray http://arxiv.org/abs/1602.07527.
-l: Output of Cholesky algorithm l = chol(A). Shape is `[M, M]`. Algorithm depends only on lower triangular part of this matrix.
+l: Output of Cholesky algorithm l = chol(A). Shape is `[M, M]`.
-grad: df/dl where f is some scalar function. Shape is `[M, M]'. Algorithm depends only on lower triangular part of this matrix.
+  Algorithm depends only on lower triangular part of this matrix.
-output: Symmetrized version of df/dA . Shape is `[M, M]'
+grad: df/dl where f is some scalar function. Shape is `[M, M]'.
  Algorithm depends only on lower triangular part of this matrix.
 output: Symmetrized version of df/dA . Shape is `[M, M]'.
 )doc");
 REGISTER_OP("BatchCholeskyGrad")
    .Input("l: T")
    .Input("grad: T")
    .Output("output: T")
    .Attr("T: {float, double}")
    .Doc(R"doc(
 Calculates the reverse mode backpropagated gradient of the Cholesky algorithm.
 For an explanation see "Differentiation of the Cholesky algorithm" by
 Iain Murray http://arxiv.org/abs/1602.07527.
 l: Output of batch Cholesky algorithm l = batch_cholesky(A). Shape is `[..., M, M]`.
  Algorithm depends only on lower triangular part of the innermost matrices of
  this tensor.
 grad: df/dl where f is some scalar function. Shape is `[..., M, M]'.
  Algorithm depends only on lower triangular part of the innermost matrices of
  this tensor.
 output: Symmetrized version of df/dA . Shape is `[..., M, M]'
 )doc");
 REGISTER_OP("SelfAdjointEig")
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@ -1397,6 +1397,36 @@ op {
  summary: "Calculates the Cholesky decomposition of a batch of square matrices."
  description: "The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions\nform square matrices, with the same constraints as the single matrix Cholesky\ndecomposition above. The output is a tensor of the same shape as the input\ncontaining the Cholesky decompositions for all input submatrices `[..., :, :]`."
 }
 op {
  name: "BatchCholeskyGrad"
  input_arg {
    name: "l"
    description: "Output of batch Cholesky algorithm l = batch_cholesky(A). Shape is `[..., M, M]`.\nAlgorithm depends only on lower triangular part of the innermost matrices of\nthis tensor."
    type_attr: "T"
  }
  input_arg {
    name: "grad"
    description: "df/dl where f is some scalar function. Shape is `[..., M, M]\'.\nAlgorithm depends only on lower triangular part of the innermost matrices of\nthis tensor."
    type_attr: "T"
  }
  output_arg {
    name: "output"
    description: "Symmetrized version of df/dA . Shape is `[..., M, M]\'"
    type_attr: "T"
  }
  attr {
    name: "T"
    type: "type"
    allowed_values {
      list {
        type: DT_FLOAT
        type: DT_DOUBLE
      }
    }
  }
  summary: "Calculates the reverse mode backpropagated gradient of the Cholesky algorithm."
  description: "For an explanation see \"Differentiation of the Cholesky algorithm\" by\nIain Murray http://arxiv.org/abs/1602.07527."
 }
 op {
  name: "BatchFFT"
  input_arg {
@ -2482,17 +2512,17 @@ op {
  name: "CholeskyGrad"
  input_arg {
    name: "l"
-    description: "Output of Cholesky algorithm l = chol(A). Shape is `[M, M]`. Algorithm depends only on lower triangular part of this matrix."
+    description: "Output of Cholesky algorithm l = chol(A). Shape is `[M, M]`.\nAlgorithm depends only on lower triangular part of this matrix."
    type_attr: "T"
  }
  input_arg {
    name: "grad"
-    description: "df/dl where f is some scalar function. Shape is `[M, M]\'. Algorithm depends only on lower triangular part of this matrix."
+    description: "df/dl where f is some scalar function. Shape is `[M, M]\'.\nAlgorithm depends only on lower triangular part of this matrix."
    type_attr: "T"
  }
  output_arg {
    name: "output"
-    description: "Symmetrized version of df/dA . Shape is `[M, M]\'"
+    description: "Symmetrized version of df/dA . Shape is `[M, M]\'."
    type_attr: "T"
  }
  attr {
@ -2506,7 +2536,7 @@ op {
    }
  }
  summary: "Calculates the reverse mode backpropagated gradient of the Cholesky algorithm."
-  description: "For an explanation see \"Differentiation of the Cholesky algorithm\" by Iain Murray http://arxiv.org/abs/1602.07527."
+  description: "For an explanation see \"Differentiation of the Cholesky algorithm\" by\nIain Murray http://arxiv.org/abs/1602.07527."
 }
 op {
  name: "Complex"
@ -11482,7 +11512,7 @@ op {
    }
  }
  summary: "Computes the sum of elements across dimensions of a SparseTensor."
-  description: "This Op takes a SparseTensor and is the sparse counterpart to\n`tf.reduce_sum()`.  In particular, this Op also returns a dense `Tensor`\ninstead of a sparse one.\n\nReduces `sp_input` along the dimensions given in `reduction_axes`.  Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained\nwith length 1.\n\nIf `reduction_axes` has no entries, all dimensions are reduced, and a tensor\nwith a single element is returned."
+  description: "This Op takes a SparseTensor and is the sparse counterpart to\n`tf.reduce_sum()`.  In particular, this Op also returns a dense `Tensor`\ninstead of a sparse one.\n\nReduces `sp_input` along the dimensions given in `reduction_axes`.  Unless\n`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in\n`reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained\nwith length 1.\n\nIf `reduction_axes` has no entries, all dimensions are reduced, and a tensor\nwith a single element is returned.  Additionally, the axes can be negative,\nwhich are interpreted according to the indexing rules in Python."
 }
 op {
  name: "SparseReorder"
--- a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
@ -52,11 +52,11 @@ def train():
  # Input placehoolders
  with tf.name_scope('input'):
    x = tf.placeholder(tf.float32, [None, 784], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, 10], name='y-input')
  with tf.name_scope('input_reshape'):
    image_shaped_input = tf.reshape(x, [-1, 28, 28, 1])
    tf.image_summary('input', image_shaped_input, 10)
    y_ = tf.placeholder(tf.float32, [None, 10], name='y-input')
    keep_prob = tf.placeholder(tf.float32)
    tf.scalar_summary('dropout_keep_probability', keep_prob)
  # We can't initialize these variables to 0 - the network will get stuck.
  def weight_variable(shape):
@ -105,7 +105,12 @@ def train():
      return activations
  hidden1 = nn_layer(x, 784, 500, 'layer1')
-  dropped = tf.nn.dropout(hidden1, keep_prob)
+
  with tf.name_scope('dropout'):
    keep_prob = tf.placeholder(tf.float32)
    tf.scalar_summary('dropout_keep_probability', keep_prob)
    dropped = tf.nn.dropout(hidden1, keep_prob)
  y = nn_layer(dropped, 500, 10, 'layer2', act=tf.nn.softmax)
  with tf.name_scope('cross_entropy'):
@ -151,9 +156,20 @@ def train():
      summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False))
      test_writer.add_summary(summary, i)
      print('Accuracy at step %s: %s' % (i, acc))
-    else:  # Record train set summarieis, and train
+    else:  # Record train set summaries, and train
-      summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True))
+      if i % 100 == 99:  # Record execution stats
-      train_writer.add_summary(summary, i)
+        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        summary, _ = sess.run([merged, train_step],
                              feed_dict=feed_dict(True),
                              options=run_options,
                              run_metadata=run_metadata)
        train_writer.add_run_metadata(run_metadata, 'step%d' % i)
        train_writer.add_summary(summary, i)
        print('Adding run metadata for', i)
      else:  # Record a summary
        summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True))
        train_writer.add_summary(summary, i)
 def main(_):
--- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md
@ -1338,9 +1338,9 @@ Variance of each batch member.
 - - -
-### `class tf.contrib.distributions.Gaussian` {#Gaussian}
+### `class tf.contrib.distributions.Normal` {#Normal}
-The scalar Gaussian distribution with mean and stddev parameters mu, sigma.
+The scalar Normal distribution with mean and stddev parameters mu, sigma.
 #### Mathematical details
@ -1353,15 +1353,15 @@ The PDF of this distribution is:
 Examples of initialization of one or a batch of distributions.
 ```python
-# Define a single scalar Gaussian distribution.
+# Define a single scalar Normal distribution.
-dist = tf.contrib.distributions.Gaussian(mu=0, sigma=3)
+dist = tf.contrib.distributions.Normal(mu=0, sigma=3)
 # Evaluate the cdf at 1, returning a scalar.
 dist.cdf(1)
-# Define a batch of two scalar valued Gaussians.
+# Define a batch of two scalar valued Normals.
 # The first has mean 1 and standard deviation 11, the second 2 and 22.
-dist = tf.contrib.distributions.Gaussian(mu=[1, 2.], sigma=[11, 22.])
+dist = tf.contrib.distributions.Normal(mu=[1, 2.], sigma=[11, 22.])
 # Evaluate the pdf of the first distribution on 0, and the second on 1.5,
 # returning a length two tensor.
@ -1374,9 +1374,9 @@ dist.sample(3)
 Arguments are broadcast when possible.
 ```python
-# Define a batch of two scalar valued Gaussians.
+# Define a batch of two scalar valued Normals.
 # Both have mean 1, but different standard deviations.
-dist = tf.contrib.distributions.Gaussian(mu=1, sigma=[11, 22.])
+dist = tf.contrib.distributions.Normal(mu=1, sigma=[11, 22.])
 # Evaluate the pdf of both distributions on the same point, 3.0,
 # returning a length 2 tensor.
@ -1384,9 +1384,9 @@ dist.pdf(3.0)
 ```
 - - -
-#### `tf.contrib.distributions.Gaussian.__init__(mu, sigma, name=None)` {#Gaussian.__init__}
+#### `tf.contrib.distributions.Normal.__init__(mu, sigma, name=None)` {#Normal.__init__}
-Construct Gaussian distributions with mean and stddev `mu` and `sigma`.
+Construct Normal distributions with mean and stddev `mu` and `sigma`.
 The parameters `mu` and `sigma` must be shaped in a way that supports
 broadcasting (e.g. `mu + sigma` is a valid operation).
@ -1407,9 +1407,9 @@ broadcasting (e.g. `mu + sigma` is a valid operation).
 - - -
-#### `tf.contrib.distributions.Gaussian.cdf(x, name=None)` {#Gaussian.cdf}
+#### `tf.contrib.distributions.Normal.cdf(x, name=None)` {#Normal.cdf}
-CDF of observations in `x` under these Gaussian distribution(s).
+CDF of observations in `x` under these Normal distribution(s).
 ##### Args:
@ -1425,16 +1425,16 @@ CDF of observations in `x` under these Gaussian distribution(s).
 - - -
-#### `tf.contrib.distributions.Gaussian.dtype` {#Gaussian.dtype}
+#### `tf.contrib.distributions.Normal.dtype` {#Normal.dtype}
 - - -
-#### `tf.contrib.distributions.Gaussian.entropy(name=None)` {#Gaussian.entropy}
+#### `tf.contrib.distributions.Normal.entropy(name=None)` {#Normal.entropy}
-The entropy of Gaussian distribution(s).
+The entropy of Normal distribution(s).
 ##### Args:
@ -1449,16 +1449,16 @@ The entropy of Gaussian distribution(s).
 - - -
-#### `tf.contrib.distributions.Gaussian.is_reparameterized` {#Gaussian.is_reparameterized}
+#### `tf.contrib.distributions.Normal.is_reparameterized` {#Normal.is_reparameterized}
 - - -
-#### `tf.contrib.distributions.Gaussian.log_cdf(x, name=None)` {#Gaussian.log_cdf}
+#### `tf.contrib.distributions.Normal.log_cdf(x, name=None)` {#Normal.log_cdf}
-Log CDF of observations `x` under these Gaussian distribution(s).
+Log CDF of observations `x` under these Normal distribution(s).
 ##### Args:
@ -1474,9 +1474,9 @@ Log CDF of observations `x` under these Gaussian distribution(s).
 - - -
-#### `tf.contrib.distributions.Gaussian.log_pdf(x, name=None)` {#Gaussian.log_pdf}
+#### `tf.contrib.distributions.Normal.log_pdf(x, name=None)` {#Normal.log_pdf}
-Log pdf of observations in `x` under these Gaussian distribution(s).
+Log pdf of observations in `x` under these Normal distribution(s).
 ##### Args:
@ -1492,23 +1492,23 @@ Log pdf of observations in `x` under these Gaussian distribution(s).
 - - -
-#### `tf.contrib.distributions.Gaussian.mean` {#Gaussian.mean}
+#### `tf.contrib.distributions.Normal.mean` {#Normal.mean}
 - - -
-#### `tf.contrib.distributions.Gaussian.mu` {#Gaussian.mu}
+#### `tf.contrib.distributions.Normal.mu` {#Normal.mu}
 - - -
-#### `tf.contrib.distributions.Gaussian.pdf(x, name=None)` {#Gaussian.pdf}
+#### `tf.contrib.distributions.Normal.pdf(x, name=None)` {#Normal.pdf}
-The PDF of observations in `x` under these Gaussian distribution(s).
+The PDF of observations in `x` under these Normal distribution(s).
 ##### Args:
@ -1524,9 +1524,9 @@ The PDF of observations in `x` under these Gaussian distribution(s).
 - - -
-#### `tf.contrib.distributions.Gaussian.sample(n, seed=None, name=None)` {#Gaussian.sample}
+#### `tf.contrib.distributions.Normal.sample(n, seed=None, name=None)` {#Normal.sample}
-Sample `n` observations from the Gaussian Distributions.
+Sample `n` observations from the Normal Distributions.
 ##### Args:
@ -1544,7 +1544,7 @@ Sample `n` observations from the Gaussian Distributions.
 - - -
-#### `tf.contrib.distributions.Gaussian.sigma` {#Gaussian.sigma}
+#### `tf.contrib.distributions.Normal.sigma` {#Normal.sigma}
@ -2443,26 +2443,26 @@ probability includes a combinatorial coefficient.
 Functions that transform conjugate prior/likelihood pairs to distributions
 representing the posterior or posterior predictive.
-### Gaussian likelihood with conjugate prior.
+### Normal likelihood with conjugate prior.
 - - -
-### `tf.contrib.distributions.gaussian_conjugates_known_sigma_posterior(prior, sigma, s, n)` {#gaussian_conjugates_known_sigma_posterior}
+### `tf.contrib.distributions.normal_conjugates_known_sigma_posterior(prior, sigma, s, n)` {#normal_conjugates_known_sigma_posterior}
-Posterior Gaussian distribution with conjugate prior on the mean.
+Posterior Normal distribution with conjugate prior on the mean.
 This model assumes that `n` observations (with sum `s`) come from a
-Gaussian with unknown mean `mu` (described by the Gaussian `prior`)
+Normal with unknown mean `mu` (described by the Normal `prior`)
 and known variance `sigma^2`.  The "known sigma posterior" is
 the distribution of the unknown `mu`.
-Accepts a prior Gaussian distribution object, having parameters
+Accepts a prior Normal distribution object, having parameters
 `mu0` and `sigma0`, as well as known `sigma` values of the predictive
-distribution(s) (also assumed Gaussian),
+distribution(s) (also assumed Normal),
 and statistical estimates `s` (the sum(s) of the observations) and
 `n` (the number(s) of observations).
-Returns a posterior (also Gaussian) distribution object, with parameters
+Returns a posterior (also Normal) distribution object, with parameters
 `(mu', sigma'^2)`, where:
 ```
@ -2477,7 +2477,7 @@ will broadcast in the case of multidimensional sets of parameters.
 ##### Args:
-*  <b>`prior`</b>: `Gaussian` object of type `dtype`:
+*  <b>`prior`</b>: `Normal` object of type `dtype`:
    the prior distribution having parameters `(mu0, sigma0)`.
 *  <b>`sigma`</b>: tensor of type `dtype`, taking values `sigma > 0`.
    The known stddev parameter(s).
@ -2486,35 +2486,35 @@ will broadcast in the case of multidimensional sets of parameters.
 ##### Returns:
-  A new Gaussian posterior distribution object for the unknown observation
+  A new Normal posterior distribution object for the unknown observation
  mean `mu`.
 ##### Raises:
 *  <b>`TypeError`</b>: if dtype of `s` does not match `dtype`, or `prior` is not a
-    Gaussian object.
+    Normal object.
 - - -
-### `tf.contrib.distributions.gaussian_congugates_known_sigma_predictive(prior, sigma, s, n)` {#gaussian_congugates_known_sigma_predictive}
+### `tf.contrib.distributions.normal_congugates_known_sigma_predictive(prior, sigma, s, n)` {#normal_congugates_known_sigma_predictive}
-Posterior predictive Gaussian distribution w. conjugate prior on the mean.
+Posterior predictive Normal distribution w. conjugate prior on the mean.
 This model assumes that `n` observations (with sum `s`) come from a
-Gaussian with unknown mean `mu` (described by the Gaussian `prior`)
+Normal with unknown mean `mu` (described by the Normal `prior`)
 and known variance `sigma^2`.  The "known sigma predictive"
 is the distribution of new observations, conditioned on the existing
 observations and our prior.
-Accepts a prior Gaussian distribution object, having parameters
+Accepts a prior Normal distribution object, having parameters
 `mu0` and `sigma0`, as well as known `sigma` values of the predictive
-distribution(s) (also assumed Gaussian),
+distribution(s) (also assumed Normal),
 and statistical estimates `s` (the sum(s) of the observations) and
 `n` (the number(s) of observations).
-Calculates the Gaussian distribution(s) `p(x | sigma^2)`:
+Calculates the Normal distribution(s) `p(x | sigma^2)`:
 ```
  p(x | sigma^2) = int N(x | mu, sigma^2) N(mu | prior.mu, prior.sigma^2) dmu
@ -2536,7 +2536,7 @@ will broadcast in the case of multidimensional sets of parameters.
 ##### Args:
-*  <b>`prior`</b>: `Gaussian` object of type `dtype`:
+*  <b>`prior`</b>: `Normal` object of type `dtype`:
    the prior distribution having parameters `(mu0, sigma0)`.
 *  <b>`sigma`</b>: tensor of type `dtype`, taking values `sigma > 0`.
    The known stddev parameter(s).
@ -2545,12 +2545,12 @@ will broadcast in the case of multidimensional sets of parameters.
 ##### Returns:
-  A new Gaussian predictive distribution object.
+  A new Normal predictive distribution object.
 ##### Raises:
 *  <b>`TypeError`</b>: if dtype of `s` does not match `dtype`, or `prior` is not a
-    Gaussian object.
+    Normal object.
--- a/tensorflow/g3doc/api_docs/python/contrib.layers.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.layers.md
@ -339,7 +339,7 @@ Optimize weights given a loss.
 - - -
-### `tf.contrib.layers.optimize_loss(loss, global_step, learning_rate, optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, moving_average_decay=0.9, learning_rate_decay_fn=None, variables=None, name=None)` {#optimize_loss}
+### `tf.contrib.layers.optimize_loss(loss, global_step, learning_rate, optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, moving_average_decay=0.9, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None)` {#optimize_loss}
 Given loss and parameters for optimizer, returns a training op.
@ -369,6 +369,8 @@ Given loss and parameters for optimizer, returns a training op.
                          Can be used to implement any learning rate decay
                          functions.
                          For example: tf.train.exponential_decay.
 *  <b>`update_ops`</b>: list of update `Operation`s to execute at each step. If `None`,
              uses elements of UPDATE_OPS collection.
 *  <b>`variables`</b>: list of variables to optimize or
             `None` to use all trainable variables.
 *  <b>`name`</b>: The name for this operation is used to scope operations and summaries.
--- a/tensorflow/g3doc/api_docs/python/contrib.learn.md
+++ b/tensorflow/g3doc/api_docs/python/contrib.learn.md
@ -3396,7 +3396,7 @@ Extracts numpy matrix from pandas DataFrame.
 - - -
-### `tf.contrib.learn.read_batch_examples(file_pattern, batch_size, reader, randomize_input=True, queue_capacity=10000, num_threads=1, name='dequeue_examples')` {#read_batch_examples}
+### `tf.contrib.learn.read_batch_examples(file_pattern, batch_size, reader, randomize_input=True, num_epochs=None, queue_capacity=10000, num_threads=1, name=None)` {#read_batch_examples}
 Adds operations to read, queue, batch `Example` protos.
@ -3418,6 +3418,10 @@ All ops are added to the default graph.
 *  <b>`reader`</b>: A function or class that returns an object with
    `read` method, (filename tensor) -> (example tensor).
 *  <b>`randomize_input`</b>: Whether the input should be randomized.
 *  <b>`num_epochs`</b>: Integer specifying the number of times to read through the
    dataset. If `None`, cycles through the dataset forever.
    NOTE - If specified, creates a variable that must be initialized, so call
    `tf.initialize_all_variables()` as shown in the tests.
 *  <b>`queue_capacity`</b>: Capacity for input queue.
 *  <b>`num_threads`</b>: The number of threads enqueuing examples.
 *  <b>`name`</b>: Name of resulting op.
@ -3434,7 +3438,7 @@ All ops are added to the default graph.
 - - -
-### `tf.contrib.learn.read_batch_features(file_pattern, batch_size, features, reader, randomize_input=True, queue_capacity=10000, num_threads=1, name='dequeue_examples')` {#read_batch_features}
+### `tf.contrib.learn.read_batch_features(file_pattern, batch_size, features, reader, randomize_input=True, num_epochs=None, queue_capacity=10000, reader_num_threads=1, parser_num_threads=1, name=None)` {#read_batch_features}
 Adds operations to read, queue, batch and parse `Example` protos.
@ -3459,8 +3463,13 @@ All ops are added to the default graph.
 *  <b>`reader`</b>: A function or class that returns an object with
    `read` method, (filename tensor) -> (example tensor).
 *  <b>`randomize_input`</b>: Whether the input should be randomized.
 *  <b>`num_epochs`</b>: Integer specifying the number of times to read through the
    dataset. If None, cycles through the dataset forever. NOTE - If specified,
    creates a variable that must be initialized, so call
    tf.initialize_all_variables() as shown in the tests.
 *  <b>`queue_capacity`</b>: Capacity for input queue.
-*  <b>`num_threads`</b>: The number of threads enqueuing examples.
+*  <b>`reader_num_threads`</b>: The number of threads to read examples.
 *  <b>`parser_num_threads`</b>: The number of threads to parse examples.
 *  <b>`name`</b>: Name of resulting op.
 ##### Returns:
@ -3475,7 +3484,7 @@ All ops are added to the default graph.
 - - -
-### `tf.contrib.learn.read_batch_record_features(file_pattern, batch_size, features, randomize_input=True, queue_capacity=10000, num_threads=1, name='dequeue_record_examples')` {#read_batch_record_features}
+### `tf.contrib.learn.read_batch_record_features(file_pattern, batch_size, features, randomize_input=True, num_epochs=None, queue_capacity=10000, reader_num_threads=1, parser_num_threads=1, name='dequeue_record_examples')` {#read_batch_record_features}
 Reads TFRecord, queues, batches and parses `Example` proto.
@ -3490,8 +3499,13 @@ See more detailed description in `read_examples`.
 *  <b>`features`</b>: A `dict` mapping feature keys to `FixedLenFeature` or
    `VarLenFeature` values.
 *  <b>`randomize_input`</b>: Whether the input should be randomized.
 *  <b>`num_epochs`</b>: Integer specifying the number of times to read through the
    dataset. If None, cycles through the dataset forever. NOTE - If specified,
    creates a variable that must be initialized, so call
    tf.initialize_all_variables() as shown in the tests.
 *  <b>`queue_capacity`</b>: Capacity for input queue.
-*  <b>`num_threads`</b>: The number of threads enqueuing examples.
+*  <b>`reader_num_threads`</b>: The number of threads to read examples.
 *  <b>`parser_num_threads`</b>: The number of threads to parse examples.
 *  <b>`name`</b>: Name of resulting op.
 ##### Returns:
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.OpError.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.OpError.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.ReaderBase.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.ReaderBase.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.TFRecordReader.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.TFRecordReader.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.Variable.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.Variable.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.argmin.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.argmin.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.assert_less_equal.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.assert_less_equal.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.assert_rank.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.assert_rank.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.batch_fft.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.batch_fft.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.batch_matrix_band_part.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.batch_matrix_band_part.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.complex_abs.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.complex_abs.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.copy_graph.copy_op_to_graph.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.copy_graph.copy_op_to_graph.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Exponential.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Exponential.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormal.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormal.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.normal_conjugates_known_sigma_posterior.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.normal_conjugates_known_sigma_posterior.md
@ -1,19 +1,19 @@
-### `tf.contrib.distributions.gaussian_conjugates_known_sigma_posterior(prior, sigma, s, n)` {#gaussian_conjugates_known_sigma_posterior}
+### `tf.contrib.distributions.normal_conjugates_known_sigma_posterior(prior, sigma, s, n)` {#normal_conjugates_known_sigma_posterior}
-Posterior Gaussian distribution with conjugate prior on the mean.
+Posterior Normal distribution with conjugate prior on the mean.
 This model assumes that `n` observations (with sum `s`) come from a
-Gaussian with unknown mean `mu` (described by the Gaussian `prior`)
+Normal with unknown mean `mu` (described by the Normal `prior`)
 and known variance `sigma^2`.  The "known sigma posterior" is
 the distribution of the unknown `mu`.
-Accepts a prior Gaussian distribution object, having parameters
+Accepts a prior Normal distribution object, having parameters
 `mu0` and `sigma0`, as well as known `sigma` values of the predictive
-distribution(s) (also assumed Gaussian),
+distribution(s) (also assumed Normal),
 and statistical estimates `s` (the sum(s) of the observations) and
 `n` (the number(s) of observations).
-Returns a posterior (also Gaussian) distribution object, with parameters
+Returns a posterior (also Normal) distribution object, with parameters
 `(mu', sigma'^2)`, where:
 ```
@ -28,7 +28,7 @@ will broadcast in the case of multidimensional sets of parameters.
 ##### Args:
-*  <b>`prior`</b>: `Gaussian` object of type `dtype`:
+*  <b>`prior`</b>: `Normal` object of type `dtype`:
    the prior distribution having parameters `(mu0, sigma0)`.
 *  <b>`sigma`</b>: tensor of type `dtype`, taking values `sigma > 0`.
    The known stddev parameter(s).
@ -37,12 +37,12 @@ will broadcast in the case of multidimensional sets of parameters.
 ##### Returns:
-  A new Gaussian posterior distribution object for the unknown observation
+  A new Normal posterior distribution object for the unknown observation
  mean `mu`.
 ##### Raises:
 *  <b>`TypeError`</b>: if dtype of `s` does not match `dtype`, or `prior` is not a
-    Gaussian object.
+    Normal object.
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.layers.xavier_initializer.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.layers.xavier_initializer.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.TensorFlowLinearRegressor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.TensorFlowLinearRegressor.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.extract_dask_data.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.extract_dask_data.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.read_batch_record_features.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.read_batch_record_features.md
@ -1,4 +1,4 @@
-### `tf.contrib.learn.read_batch_record_features(file_pattern, batch_size, features, randomize_input=True, queue_capacity=10000, num_threads=1, name='dequeue_record_examples')` {#read_batch_record_features}
+### `tf.contrib.learn.read_batch_record_features(file_pattern, batch_size, features, randomize_input=True, num_epochs=None, queue_capacity=10000, reader_num_threads=1, parser_num_threads=1, name='dequeue_record_examples')` {#read_batch_record_features}
 Reads TFRecord, queues, batches and parses `Example` proto.
@ -13,8 +13,13 @@ See more detailed description in `read_examples`.
 *  <b>`features`</b>: A `dict` mapping feature keys to `FixedLenFeature` or
    `VarLenFeature` values.
 *  <b>`randomize_input`</b>: Whether the input should be randomized.
 *  <b>`num_epochs`</b>: Integer specifying the number of times to read through the
    dataset. If None, cycles through the dataset forever. NOTE - If specified,
    creates a variable that must be initialized, so call
    tf.initialize_all_variables() as shown in the tests.
 *  <b>`queue_capacity`</b>: Capacity for input queue.
-*  <b>`num_threads`</b>: The number of threads enqueuing examples.
+*  <b>`reader_num_threads`</b>: The number of threads to read examples.
 *  <b>`parser_num_threads`</b>: The number of threads to parse examples.
 *  <b>`name`</b>: Name of resulting op.
 ##### Returns:
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.train.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.train.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.metrics.set_difference.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.metrics.set_difference.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.metrics.streaming_mean_absolute_error.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.metrics.streaming_mean_absolute_error.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.metrics.streaming_mean_cosine_distance.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.metrics.streaming_mean_cosine_distance.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.diag_part.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.diag_part.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.erf.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.erf.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.greater_equal.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.greater_equal.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.group.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.group.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.image.adjust_contrast.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.image.adjust_contrast.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.image.random_contrast.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.image.random_contrast.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.image.sample_distorted_bounding_box.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.image.sample_distorted_bounding_box.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.import_graph_def.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.import_graph_def.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.initialize_local_variables.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.initialize_local_variables.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.initialize_variables.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.initialize_variables.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.inv.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.inv.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.local_variables.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.local_variables.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.logical_xor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.logical_xor.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.matrix_solve_ls.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.matrix_solve_ls.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.avg_pool.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.avg_pool.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.batch_normalization.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.batch_normalization.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.in_top_k.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.in_top_k.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.softsign.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.softsign.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.sufficient_statistics.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.nn.sufficient_statistics.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.ones_initializer.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.ones_initializer.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.placeholder.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.placeholder.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.polygamma.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.polygamma.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.range.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.range.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.register_tensor_conversion_function.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.register_tensor_conversion_function.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.reverse_sequence.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.reverse_sequence.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.segment_sum.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.segment_sum.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.self_adjoint_eig.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.self_adjoint_eig.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.sparse_add.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.sparse_add.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.sqrt.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.sqrt.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.stop_gradient.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.stop_gradient.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.test.compute_gradient_error.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.test.compute_gradient_error.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.to_int32.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.to_int32.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.trace.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.trace.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.train.Optimizer.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.train.Optimizer.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.train.Saver.from_proto.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.train.Saver.from_proto.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.train.Saver.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.train.Saver.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.train.string_input_producer.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.train.string_input_producer.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.truediv.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.truediv.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.while_loop.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.while_loop.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.DType.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.DType.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Dimension.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Dimension.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.RegisterGradient.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.RegisterGradient.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Tensor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Tensor.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.abs.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.abs.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.add_n.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.add_n.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.assert_variables_initialized.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.assert_variables_initialized.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.batch_self_adjoint_eig.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.batch_self_adjoint_eig.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.DirichletMultinomial.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.DirichletMultinomial.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.layers.apply_regularization.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.layers.apply_regularization.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.layers.summarize_tensor.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.layers.summarize_tensor.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.RunConfig.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.RunConfig.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_mean_relative_error.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_mean_relative_error.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.control_dependencies.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.control_dependencies.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.diag.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.diag.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.dynamic_partition.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.dynamic_partition.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.errors.ResourceExhaustedError.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.errors.ResourceExhaustedError.md
--- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.histogram_fixed_width.md
+++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.histogram_fixed_width.md
--- a/Show More
+++ b/Show More